From be68d63a139bd4a0eae44d06234eaff8c07d207c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 12 Jun 2024 19:19:36 -0400
Subject: ring-buffer: Add ring_buffer_alloc_range()

In preparation to allowing the trace ring buffer to be allocated in a
range of memory that is persistent across reboots, add
ring_buffer_alloc_range(). It takes a contiguous range of memory and will
split it up evenly for the per CPU ring buffers.

If there's not enough memory to handle all CPUs with the minimum size, it
will fail to allocate the ring buffer.

Link: https://lkml.kernel.org/r/20240612232025.363998725@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineeth Pillai <vineeth@bitbyteword.org>
Cc: Youssef Esmat <youssefesmat@google.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 96d2140b471e..a50b0223b1d3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -89,6 +89,11 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer,
 struct trace_buffer *
 __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
 
+struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags,
+					       int order, unsigned long start,
+					       unsigned long range_size,
+					       struct lock_class_key *key);
+
 /*
  * Because the ring buffer is generic, if other users of the ring buffer get
  * traced by ftrace, it can produce lockdep warnings. We need to keep each
@@ -100,6 +105,18 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc_range(size, flags, order, start, range_size)	\
+({									\
+	static struct lock_class_key __key;				\
+	__ring_buffer_alloc_range((size), (flags), (order), (start),	\
+				  (range_size), &__key);		\
+})
+
 typedef bool (*ring_buffer_cond_fn)(void *data);
 int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
 		     ring_buffer_cond_fn cond, void *data);
-- 
cgit v1.2.3


From 7a1d1e4b9639ff08b2f42605c2950ae1ba4a43bf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 12 Jun 2024 19:19:44 -0400
Subject: tracing/ring-buffer: Add last_boot_info file to boot instance

If an instance is mapped to memory on boot up, create a new file called
"last_boot_info" that will hold information that can be used to properly
parse the raw data in the ring buffer.

It will export the delta of the addresses for text and data from what it
was from the last boot. It does not expose actually addresses (unless you
knew what the actual address was from the last boot).

The output will look like:

 # cat last_boot_info
 text delta:	-268435456
 data delta:	-268435456

The text and data are kept separate in case they are ever made different.

Link: https://lkml.kernel.org/r/20240612232026.658680738@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineeth Pillai <vineeth@bitbyteword.org>
Cc: Youssef Esmat <youssefesmat@google.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index a50b0223b1d3..55de3798a9b9 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -94,6 +94,9 @@ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flag
 					       unsigned long range_size,
 					       struct lock_class_key *key);
 
+bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text,
+				 long *data);
+
 /*
  * Because the ring buffer is generic, if other users of the ring buffer get
  * traced by ftrace, it can produce lockdep warnings. We need to keep each
-- 
cgit v1.2.3


From 304b3f2bc07ba7c74a1ebc7917a8f0549e6b8d54 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:16 -1000
Subject: sched: Allow sched_cgroup_fork() to fail and introduce
 sched_cancel_fork()

A new BPF extensible sched_class will need more control over the forking
process. It wants to be able to fail from sched_cgroup_fork() after the new
task's sched_task_group is initialized so that the loaded BPF program can
prepare the task with its cgroup association is established and reject fork
if e.g. allocation fails.

Allow sched_cgroup_fork() to fail by making it return int instead of void
and adding sched_cancel_fork() to undo sched_fork() in the error path.

sched_cgroup_fork() doesn't fail yet and this patch shouldn't cause any
behavior changes.

v2: Patch description updated to detail the expected use.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/task.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index d362aacf9f89..4df2f9055587 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -63,7 +63,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+extern void sched_cancel_fork(struct task_struct *p);
 extern void sched_post_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
-- 
cgit v1.2.3


From 4f9c7ca851044273df5d67e00ca0b4d0476a48f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:16 -1000
Subject: sched: Factor out cgroup weight conversion functions

Factor out sched_weight_from/to_cgroup() which convert between scheduler
shares and cgroup weight. No functional change. The factored out functions
will be used by a new BPF extensible sched_class so that the weights can be
exposed to the BPF programs in a way which is consistent cgroup weights and
easier to interpret.

The weight conversions will be used regardless of cgroup usage. It's just
borrowing the cgroup weight range as it's more intuitive.
CGROUP_WEIGHT_MIN/DFL/MAX constants are moved outside CONFIG_CGROUPS so that
the conversion helpers can always be defined.

v2: The helpers are now defined regardless of COFNIG_CGROUPS.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/cgroup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2150ca60394b..3cdaec701600 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -29,8 +29,6 @@
 
 struct kernel_clone_args;
 
-#ifdef CONFIG_CGROUPS
-
 /*
  * All weight knobs on the default hierarchy should use the following min,
  * default and max values.  The default value is the logarithmic center of
@@ -40,6 +38,8 @@ struct kernel_clone_args;
 #define CGROUP_WEIGHT_DFL		100
 #define CGROUP_WEIGHT_MAX		10000
 
+#ifdef CONFIG_CGROUPS
+
 enum {
 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
-- 
cgit v1.2.3


From a7a9fc549293168c1edbc8433d5d4fbbe1171630 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:17 -1000
Subject: sched_ext: Add boilerplate for extensible scheduler class

This adds dummy implementations of sched_ext interfaces which interact with
the scheduler core and hook them in the correct places. As they're all
dummies, this doesn't cause any behavior changes. This is split out to help
reviewing.

v2: balance_scx_on_up() dropped. This will be handled in sched_ext proper.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 include/linux/sched/ext.h

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
new file mode 100644
index 000000000000..a05dfcf533b0
--- /dev/null
+++ b/include/linux/sched/ext.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_EXT_H
+#define _LINUX_SCHED_EXT_H
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+#error "NOT IMPLEMENTED YET"
+#else	/* !CONFIG_SCHED_CLASS_EXT */
+
+static inline void sched_ext_free(struct task_struct *p) {}
+
+#endif	/* CONFIG_SCHED_CLASS_EXT */
+#endif	/* _LINUX_SCHED_EXT_H */
-- 
cgit v1.2.3


From f0e1a0643a59bf1f922fa209cec86a170b784f3f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:17 -1000
Subject: sched_ext: Implement BPF extensible scheduler class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement a new scheduler class sched_ext (SCX), which allows scheduling
policies to be implemented as BPF programs to achieve the following:

1. Ease of experimentation and exploration: Enabling rapid iteration of new
   scheduling policies.

2. Customization: Building application-specific schedulers which implement
   policies that are not applicable to general-purpose schedulers.

3. Rapid scheduler deployments: Non-disruptive swap outs of scheduling
   policies in production environments.

sched_ext leverages BPF’s struct_ops feature to define a structure which
exports function callbacks and flags to BPF programs that wish to implement
scheduling policies. The struct_ops structure exported by sched_ext is
struct sched_ext_ops, and is conceptually similar to struct sched_class. The
role of sched_ext is to map the complex sched_class callbacks to the more
simple and ergonomic struct sched_ext_ops callbacks.

For more detailed discussion on the motivations and overview, please refer
to the cover letter.

Later patches will also add several example schedulers and documentation.

This patch implements the minimum core framework to enable implementation of
BPF schedulers. Subsequent patches will gradually add functionalities
including safety guarantee mechanisms, nohz and cgroup support.

include/linux/sched/ext.h defines struct sched_ext_ops. With the comment on
top, each operation should be self-explanatory. The followings are worth
noting:

- Both "sched_ext" and its shorthand "scx" are used. If the identifier
  already has "sched" in it, "ext" is used; otherwise, "scx".

- In sched_ext_ops, only .name is mandatory. Every operation is optional and
  if omitted a simple but functional default behavior is provided.

- A new policy constant SCHED_EXT is added and a task can select sched_ext
  by invoking sched_setscheduler(2) with the new policy constant. However,
  if the BPF scheduler is not loaded, SCHED_EXT is the same as SCHED_NORMAL
  and the task is scheduled by CFS. When the BPF scheduler is loaded, all
  tasks which have the SCHED_EXT policy are switched to sched_ext.

- To bridge the workflow imbalance between the scheduler core and
  sched_ext_ops callbacks, sched_ext uses simple FIFOs called dispatch
  queues (dsq's). By default, there is one global dsq (SCX_DSQ_GLOBAL), and
  one local per-CPU dsq (SCX_DSQ_LOCAL). SCX_DSQ_GLOBAL is provided for
  convenience and need not be used by a scheduler that doesn't require it.
  SCX_DSQ_LOCAL is the per-CPU FIFO that sched_ext pulls from when putting
  the next task on the CPU. The BPF scheduler can manage an arbitrary number
  of dsq's using scx_bpf_create_dsq() and scx_bpf_destroy_dsq().

- sched_ext guarantees system integrity no matter what the BPF scheduler
  does. To enable this, each task's ownership is tracked through
  p->scx.ops_state and all tasks are put on scx_tasks list. The disable path
  can always recover and revert all tasks back to CFS. See p->scx.ops_state
  and scx_tasks.

- A task is not tied to its rq while enqueued. This decouples CPU selection
  from queueing and allows sharing a scheduling queue across an arbitrary
  subset of CPUs. This adds some complexities as a task may need to be
  bounced between rq's right before it starts executing. See
  dispatch_to_local_dsq() and move_task_to_local_dsq().

- One complication that arises from the above weak association between task
  and rq is that synchronizing with dequeue() gets complicated as dequeue()
  may happen anytime while the task is enqueued and the dispatch path might
  need to release the rq lock to transfer the task. Solving this requires a
  bit of complexity. See the logic around p->scx.sticky_cpu and
  p->scx.ops_qseq.

- Both enable and disable paths are a bit complicated. The enable path
  switches all tasks without blocking to avoid issues which can arise from
  partially switched states (e.g. the switching task itself being starved).
  The disable path can't trust the BPF scheduler at all, so it also has to
  guarantee forward progress without blocking. See scx_ops_enable() and
  scx_ops_disable_workfn().

- When sched_ext is disabled, static_branches are used to shut down the
  entry points from hot paths.

v7: - scx_ops_bypass() was incorrectly and unnecessarily trying to grab
      scx_ops_enable_mutex which can lead to deadlocks in the disable path.
      Fixed.

    - Fixed TASK_DEAD handling bug in scx_ops_enable() path which could lead
      to use-after-free.

    - Consolidated per-cpu variable usages and other cleanups.

v6: - SCX_NR_ONLINE_OPS replaced with SCX_OPI_*_BEGIN/END so that multiple
      groups can be expressed. Later CPU hotplug operations are put into
      their own group.

    - SCX_OPS_DISABLING state is replaced with the new bypass mechanism
      which allows temporarily putting the system into simple FIFO
      scheduling mode bypassing the BPF scheduler. In addition to the shut
      down path, this will also be used to isolate the BPF scheduler across
      PM events. Enabling and disabling the bypass mode requires iterating
      all runnable tasks. rq->scx.runnable_list addition is moved from the
      later watchdog patch.

    - ops.prep_enable() is replaced with ops.init_task() and
      ops.enable/disable() are now called whenever the task enters and
      leaves sched_ext instead of when the task becomes schedulable on
      sched_ext and stops being so. A new operation - ops.exit_task() - is
      called when the task stops being schedulable on sched_ext.

    - scx_bpf_dispatch() can now be called from ops.select_cpu() too. This
      removes the need for communicating local dispatch decision made by
      ops.select_cpu() to ops.enqueue() via per-task storage.
      SCX_KF_SELECT_CPU is added to support the change.

    - SCX_TASK_ENQ_LOCAL which told the BPF scheudler that
      scx_select_cpu_dfl() wants the task to be dispatched to the local DSQ
      was removed. Instead, scx_bpf_select_cpu_dfl() now dispatches directly
      if it finds a suitable idle CPU. If such behavior is not desired,
      users can use scx_bpf_select_cpu_dfl() which returns the verdict in a
      bool out param.

    - scx_select_cpu_dfl() was mishandling WAKE_SYNC and could end up
      queueing many tasks on a local DSQ which makes tasks to execute in
      order while other CPUs stay idle which made some hackbench numbers
      really bad. Fixed.

    - The current state of sched_ext can now be monitored through files
      under /sys/sched_ext instead of /sys/kernel/debug/sched/ext. This is
      to enable monitoring on kernels which don't enable debugfs.

    - sched_ext wasn't telling BPF that ops.dispatch()'s @prev argument may
      be NULL and a BPF scheduler which derefs the pointer without checking
      could crash the kernel. Tell BPF. This is currently a bit ugly. A
      better way to annotate this is expected in the future.

    - scx_exit_info updated to carry pointers to message buffers instead of
      embedding them directly. This decouples buffer sizes from API so that
      they can be changed without breaking compatibility.

    - exit_code added to scx_exit_info. This is used to indicate different
      exit conditions on non-error exits and will be used to handle e.g. CPU
      hotplugs.

    - The patch "sched_ext: Allow BPF schedulers to switch all eligible
      tasks into sched_ext" is folded in and the interface is changed so
      that partial switching is indicated with a new ops flag
      %SCX_OPS_SWITCH_PARTIAL. This makes scx_bpf_switch_all() unnecessasry
      and in turn SCX_KF_INIT. ops.init() is now called with
      SCX_KF_SLEEPABLE.

    - Code reorganized so that only the parts necessary to integrate with
      the rest of the kernel are in the header files.

    - Changes to reflect the BPF and other kernel changes including the
      addition of bpf_sched_ext_ops.cfi_stubs.

v5: - To accommodate 32bit configs, p->scx.ops_state is now atomic_long_t
      instead of atomic64_t and scx_dsp_buf_ent.qseq which uses
      load_acquire/store_release is now unsigned long instead of u64.

    - Fix the bug where bpf_scx_btf_struct_access() was allowing write
      access to arbitrary fields.

    - Distinguish kfuncs which can be called from any sched_ext ops and from
      anywhere. e.g. scx_bpf_pick_idle_cpu() can now be called only from
      sched_ext ops.

    - Rename "type" to "kind" in scx_exit_info to make it easier to use on
      languages in which "type" is a reserved keyword.

    - Since cff9b2332ab7 ("kernel/sched: Modify initial boot task idle
      setup"), PF_IDLE is not set on idle tasks which haven't been online
      yet which made scx_task_iter_next_filtered() include those idle tasks
      in iterations leading to oopses. Update scx_task_iter_next_filtered()
      to directly test p->sched_class against idle_sched_class instead of
      using is_idle_task() which tests PF_IDLE.

    - Other updates to match upstream changes such as adding const to
      set_cpumask() param and renaming check_preempt_curr() to
      wakeup_preempt().

v4: - SCHED_CHANGE_BLOCK replaced with the previous
      sched_deq_and_put_task()/sched_enq_and_set_tsak() pair. This is
      because upstream is adaopting a different generic cleanup mechanism.
      Once that lands, the code will be adapted accordingly.

    - task_on_scx() used to test whether a task should be switched into SCX,
      which is confusing. Renamed to task_should_scx(). task_on_scx() now
      tests whether a task is currently on SCX.

    - scx_has_idle_cpus is barely used anymore and replaced with direct
      check on the idle cpumask.

    - SCX_PICK_IDLE_CORE added and scx_pick_idle_cpu() improved to prefer
      fully idle cores.

    - ops.enable() now sees up-to-date p->scx.weight value.

    - ttwu_queue path is disabled for tasks on SCX to avoid confusing BPF
      schedulers expecting ->select_cpu() call.

    - Use cpu_smt_mask() instead of topology_sibling_cpumask() like the rest
      of the scheduler.

v3: - ops.set_weight() added to allow BPF schedulers to track weight changes
      without polling p->scx.weight.

    - move_task_to_local_dsq() was losing SCX-specific enq_flags when
      enqueueing the task on the target dsq because it goes through
      activate_task() which loses the upper 32bit of the flags. Carry the
      flags through rq->scx.extra_enq_flags.

    - scx_bpf_dispatch(), scx_bpf_pick_idle_cpu(), scx_bpf_task_running()
      and scx_bpf_task_cpu() now use the new KF_RCU instead of
      KF_TRUSTED_ARGS to make it easier for BPF schedulers to call them.

    - The kfunc helper access control mechanism implemented through
      sched_ext_entity.kf_mask is improved. Now SCX_CALL_OP*() is always
      used when invoking scx_ops operations.

v2: - balance_scx_on_up() is dropped. Instead, on UP, balance_scx() is
      called from put_prev_taks_scx() and pick_next_task_scx() as necessary.
      To determine whether balance_scx() should be called from
      put_prev_task_scx(), SCX_TASK_DEQD_FOR_SLEEP flag is added. See the
      comment in put_prev_task_scx() for details.

    - sched_deq_and_put_task() / sched_enq_and_set_task() sequences replaced
      with SCHED_CHANGE_BLOCK().

    - Unused all_dsqs list removed. This was a left-over from previous
      iterations.

    - p->scx.kf_mask is added to track and enforce which kfunc helpers are
      allowed. Also, init/exit sequences are updated to make some kfuncs
      always safe to call regardless of the current BPF scheduler state.
      Combined, this should make all the kfuncs safe.

    - BPF now supports sleepable struct_ops operations. Hacky workaround
      removed and operations and kfunc helpers are tagged appropriately.

    - BPF now supports bitmask / cpumask helpers. scx_bpf_get_idle_cpumask()
      and friends are added so that BPF schedulers can use the idle masks
      with the generic helpers. This replaces the hacky kfunc helpers added
      by a separate patch in V1.

    - CONFIG_SCHED_CLASS_EXT can no longer be enabled if SCHED_CORE is
      enabled. This restriction will be removed by a later patch which adds
      core-sched support.

    - Add MAINTAINERS entries and other misc changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Co-authored-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
---
 include/asm-generic/vmlinux.lds.h |   1 +
 include/linux/sched.h             |   5 ++
 include/linux/sched/ext.h         | 141 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/sched.h        |   1 +
 4 files changed, 147 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..2e712183ba09 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -133,6 +133,7 @@
 	*(__dl_sched_class)			\
 	*(__rt_sched_class)			\
 	*(__fair_sched_class)			\
+	*(__ext_sched_class)			\
 	*(__idle_sched_class)			\
 	__sched_class_lowest = .;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90691d99027e..06beb8a6e0ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -80,6 +80,8 @@ struct task_group;
 struct task_struct;
 struct user_event_mm;
 
+#include <linux/sched/ext.h>
+
 /*
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
@@ -802,6 +804,9 @@ struct task_struct {
 	struct sched_rt_entity		rt;
 	struct sched_dl_entity		dl;
 	struct sched_dl_entity		*dl_server;
+#ifdef CONFIG_SCHED_CLASS_EXT
+	struct sched_ext_entity		scx;
+#endif
 	const struct sched_class	*sched_class;
 
 #ifdef CONFIG_SCHED_CORE
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index a05dfcf533b0..c1530a7992cc 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -1,9 +1,148 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
 #ifndef _LINUX_SCHED_EXT_H
 #define _LINUX_SCHED_EXT_H
 
 #ifdef CONFIG_SCHED_CLASS_EXT
-#error "NOT IMPLEMENTED YET"
+
+#include <linux/llist.h>
+#include <linux/rhashtable-types.h>
+
+enum scx_public_consts {
+	SCX_OPS_NAME_LEN	= 128,
+
+	SCX_SLICE_DFL		= 20 * 1000000,	/* 20ms */
+};
+
+/*
+ * DSQ (dispatch queue) IDs are 64bit of the format:
+ *
+ *   Bits: [63] [62 ..  0]
+ *         [ B] [   ID   ]
+ *
+ *    B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs
+ *   ID: 63 bit ID
+ *
+ * Built-in IDs:
+ *
+ *   Bits: [63] [62] [61..32] [31 ..  0]
+ *         [ 1] [ L] [   R  ] [    V   ]
+ *
+ *    1: 1 for built-in DSQs.
+ *    L: 1 for LOCAL_ON DSQ IDs, 0 for others
+ *    V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value.
+ */
+enum scx_dsq_id_flags {
+	SCX_DSQ_FLAG_BUILTIN	= 1LLU << 63,
+	SCX_DSQ_FLAG_LOCAL_ON	= 1LLU << 62,
+
+	SCX_DSQ_INVALID		= SCX_DSQ_FLAG_BUILTIN | 0,
+	SCX_DSQ_GLOBAL		= SCX_DSQ_FLAG_BUILTIN | 1,
+	SCX_DSQ_LOCAL		= SCX_DSQ_FLAG_BUILTIN | 2,
+	SCX_DSQ_LOCAL_ON	= SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON,
+	SCX_DSQ_LOCAL_CPU_MASK	= 0xffffffffLLU,
+};
+
+/*
+ * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the
+ * scheduler core and the BPF scheduler. See the documentation for more details.
+ */
+struct scx_dispatch_q {
+	raw_spinlock_t		lock;
+	struct list_head	list;	/* tasks in dispatch order */
+	u32			nr;
+	u64			id;
+	struct rhash_head	hash_node;
+	struct llist_node	free_node;
+	struct rcu_head		rcu;
+};
+
+/* scx_entity.flags */
+enum scx_ent_flags {
+	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
+	SCX_TASK_BAL_KEEP	= 1 << 1, /* balance decided to keep current */
+	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
+	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
+
+	SCX_TASK_STATE_SHIFT	= 8,	  /* bit 8 and 9 are used to carry scx_task_state */
+	SCX_TASK_STATE_BITS	= 2,
+	SCX_TASK_STATE_MASK	= ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT,
+
+	SCX_TASK_CURSOR		= 1 << 31, /* iteration cursor, not a task */
+};
+
+/* scx_entity.flags & SCX_TASK_STATE_MASK */
+enum scx_task_state {
+	SCX_TASK_NONE,		/* ops.init_task() not called yet */
+	SCX_TASK_INIT,		/* ops.init_task() succeeded, but task can be cancelled */
+	SCX_TASK_READY,		/* fully initialized, but not in sched_ext */
+	SCX_TASK_ENABLED,	/* fully initialized and in sched_ext */
+
+	SCX_TASK_NR_STATES,
+};
+
+/*
+ * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
+ * everywhere and the following bits track which kfunc sets are currently
+ * allowed for %current. This simple per-task tracking works because SCX ops
+ * nest in a limited way. BPF will likely implement a way to allow and disallow
+ * kfuncs depending on the calling context which will replace this manual
+ * mechanism. See scx_kf_allow().
+ */
+enum scx_kf_mask {
+	SCX_KF_UNLOCKED		= 0,	  /* not sleepable, not rq locked */
+	/* all non-sleepables may be nested inside SLEEPABLE */
+	SCX_KF_SLEEPABLE	= 1 << 0, /* sleepable init operations */
+	/* ops.dequeue (in REST) may be nested inside DISPATCH */
+	SCX_KF_DISPATCH		= 1 << 2, /* ops.dispatch() */
+	SCX_KF_ENQUEUE		= 1 << 3, /* ops.enqueue() and ops.select_cpu() */
+	SCX_KF_SELECT_CPU	= 1 << 4, /* ops.select_cpu() */
+	SCX_KF_REST		= 1 << 5, /* other rq-locked operations */
+
+	__SCX_KF_RQ_LOCKED	= SCX_KF_DISPATCH |
+				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
+};
+
+/*
+ * The following is embedded in task_struct and contains all fields necessary
+ * for a task to be scheduled by SCX.
+ */
+struct sched_ext_entity {
+	struct scx_dispatch_q	*dsq;
+	struct list_head	dsq_node;
+	u32			flags;		/* protected by rq lock */
+	u32			weight;
+	s32			sticky_cpu;
+	s32			holding_cpu;
+	u32			kf_mask;	/* see scx_kf_mask above */
+	atomic_long_t		ops_state;
+
+	struct list_head	runnable_node;	/* rq->scx.runnable_list */
+
+	u64			ddsp_dsq_id;
+	u64			ddsp_enq_flags;
+
+	/* BPF scheduler modifiable fields */
+
+	/*
+	 * Runtime budget in nsecs. This is usually set through
+	 * scx_bpf_dispatch() but can also be modified directly by the BPF
+	 * scheduler. Automatically decreased by SCX as the task executes. On
+	 * depletion, a scheduling event is triggered.
+	 */
+	u64			slice;
+
+	/* cold fields */
+	/* must be the last field, see init_scx_entity() */
+	struct list_head	tasks_node;
+};
+
+void sched_ext_free(struct task_struct *p);
+
 #else	/* !CONFIG_SCHED_CLASS_EXT */
 
 static inline void sched_ext_free(struct task_struct *p) {}
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..359a14cc76a4 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -118,6 +118,7 @@ struct clone_args {
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE		5
 #define SCHED_DEADLINE		6
+#define SCHED_EXT		7
 
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
-- 
cgit v1.2.3


From 8a010b81b3a50b033fc3cddc613517abda586cbe Mon Sep 17 00:00:00 2001
From: David Vernet <dvernet@meta.com>
Date: Tue, 18 Jun 2024 10:09:18 -1000
Subject: sched_ext: Implement runnable task stall watchdog

The most common and critical way that a BPF scheduler can misbehave is by
failing to run runnable tasks for too long. This patch implements a
watchdog.

* All tasks record when they become runnable.

* A watchdog work periodically scans all runnable tasks. If any task has
  stayed runnable for too long, the BPF scheduler is aborted.

* scheduler_tick() monitors whether the watchdog itself is stuck. If so, the
  BPF scheduler is aborted.

Because the watchdog only scans the tasks which are currently runnable and
usually very infrequently, the overhead should be negligible.
scx_qmap is updated so that it can be told to stall user and/or
kernel tasks.

A detected task stall looks like the following:

 sched_ext: BPF scheduler "qmap" errored, disabling
 sched_ext: runnable task stall (dbus-daemon[953] failed to run for 6.478s)
    scx_check_timeout_workfn+0x10e/0x1b0
    process_one_work+0x287/0x560
    worker_thread+0x234/0x420
    kthread+0xe9/0x100
    ret_from_fork+0x1f/0x30

A detected watchdog stall:

 sched_ext: BPF scheduler "qmap" errored, disabling
 sched_ext: runnable task stall (watchdog failed to check in for 5.001s)
    scheduler_tick+0x2eb/0x340
    update_process_times+0x7a/0x90
    tick_sched_timer+0xd8/0x130
    __hrtimer_run_queues+0x178/0x3b0
    hrtimer_interrupt+0xfc/0x390
    __sysvec_apic_timer_interrupt+0xb7/0x2b0
    sysvec_apic_timer_interrupt+0x90/0xb0
    asm_sysvec_apic_timer_interrupt+0x1b/0x20
    default_idle+0x14/0x20
    arch_cpu_idle+0xf/0x20
    default_idle_call+0x50/0x90
    do_idle+0xe8/0x240
    cpu_startup_entry+0x1d/0x20
    kernel_init+0x0/0x190
    start_kernel+0x0/0x392
    start_kernel+0x324/0x392
    x86_64_start_reservations+0x2a/0x2c
    x86_64_start_kernel+0x104/0x109
    secondary_startup_64_no_verify+0xce/0xdb

Note that this patch exposes scx_ops_error[_type]() in kernel/sched/ext.h to
inline scx_notify_sched_tick().

v4: - While disabling, cancel_delayed_work_sync(&scx_watchdog_work) was
      being called before forward progress was guaranteed and thus could
      lead to system lockup. Relocated.

    - While enabling, it was comparing msecs against jiffies without
      conversion leading to spurious load failures on lower HZ kernels.
      Fixed.

    - runnable list management is now used by core bypass logic and moved to
      the patch implementing sched_ext core.

v3: - bpf_scx_init_member() was incorrectly comparing ops->timeout_ms
      against SCX_WATCHDOG_MAX_TIMEOUT which is in jiffies without
      conversion leading to spurious load failures in lower HZ kernels.
      Fixed.

v2: - Julia Lawall noticed that the watchdog code was mixing msecs and
      jiffies. Fix by using jiffies for everything.

Signed-off-by: David Vernet <dvernet@meta.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
---
 include/linux/sched/ext.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index c1530a7992cc..96031252436f 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -122,6 +122,7 @@ struct sched_ext_entity {
 	atomic_long_t		ops_state;
 
 	struct list_head	runnable_node;	/* rq->scx.runnable_list */
+	unsigned long		runnable_at;
 
 	u64			ddsp_dsq_id;
 	u64			ddsp_enq_flags;
-- 
cgit v1.2.3


From 7bb6f0810ecfb73a9d7a2ca56fb001e0201a6758 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:18 -1000
Subject: sched_ext: Allow BPF schedulers to disallow specific tasks from
 joining SCHED_EXT

BPF schedulers might not want to schedule certain tasks - e.g. kernel
threads. This patch adds p->scx.disallow which can be set by BPF schedulers
in such cases. The field can be changed anytime and setting it in
ops.prep_enable() guarantees that the task can never be scheduled by
sched_ext.

scx_qmap is updated with the -d option to disallow a specific PID:

  # echo $$
  1092
  # grep -E '(policy)|(ext\.enabled)' /proc/self/sched
  policy                                       :                    0
  ext.enabled                                  :                    0
  # ./set-scx 1092
  # grep -E '(policy)|(ext\.enabled)' /proc/self/sched
  policy                                       :                    7
  ext.enabled                                  :                    0

Run "scx_qmap -p -d 1092" in another terminal.

  # cat /sys/kernel/sched_ext/nr_rejected
  1
  # grep -E '(policy)|(ext\.enabled)' /proc/self/sched
  policy                                       :                    0
  ext.enabled                                  :                    0
  # ./set-scx 1092
  setparam failed for 1092 (Permission denied)

- v4: Refreshed on top of tip:sched/core.

- v3: Update description to reflect /sys/kernel/sched_ext interface change.

- v2: Use atomic_long_t instead of atomic64_t for scx_kick_cpus_pnt_seqs to
      accommodate 32bit archs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Barret Rhoden <brho@google.com>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 96031252436f..ea7c501ac819 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -137,6 +137,18 @@ struct sched_ext_entity {
 	 */
 	u64			slice;
 
+	/*
+	 * If set, reject future sched_setscheduler(2) calls updating the policy
+	 * to %SCHED_EXT with -%EACCES.
+	 *
+	 * If set from ops.init_task() and the task's policy is already
+	 * %SCHED_EXT, which can happen while the BPF scheduler is being loaded
+	 * or by inhering the parent's policy during fork, the task's policy is
+	 * rejected and forcefully reverted to %SCHED_NORMAL. The number of
+	 * such events are reported through /sys/kernel/debug/sched_ext::nr_rejected.
+	 */
+	bool			disallow;	/* reject switching into SCX */
+
 	/* cold fields */
 	/* must be the last field, see init_scx_entity() */
 	struct list_head	tasks_node;
-- 
cgit v1.2.3


From 1538e33995eaf3f315cbb5506019b9f913ed8555 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Tue, 18 Jun 2024 10:09:18 -1000
Subject: sched_ext: Print sched_ext info when dumping stack

It would be useful to see what the sched_ext scheduler state is, and what
scheduler is running, when we're dumping a task's stack. This patch
therefore adds a new print_scx_info() function that's called in the same
context as print_worker_info() and print_stop_info(). An example dump
follows.

  BUG: kernel NULL pointer dereference, address: 0000000000000999
  #PF: supervisor write access in kernel mode
  #PF: error_code(0x0002) - not-present page
  PGD 0 P4D 0
  Oops: 0002 [#1] PREEMPT SMP
  CPU: 13 PID: 2047 Comm: insmod Tainted: G           O       6.6.0-work-10323-gb58d4cae8e99-dirty #34
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 2/2/2022
  Sched_ext: qmap (enabled+all), task: runnable_at=-17ms
  RIP: 0010:init_module+0x9/0x1000 [test_module]
  ...

v3: - scx_ops_enable_state_str[] definition moved to an earlier patch as
      it's now used by core implementation.

    - Convert jiffy delta to msecs using jiffies_to_msecs() instead of
      multiplying by (HZ / MSEC_PER_SEC). The conversion is implemented in
      jiffies_delta_msecs().

v2: - We are now using scx_ops_enable_state_str[] outside
      CONFIG_SCHED_DEBUG. Move it outside of CONFIG_SCHED_DEBUG and to the
      top. This was reported by Changwoo and Andrea.

Signed-off-by: David Vernet <void@manifault.com>
Reported-by: Changwoo Min <changwoo@igalia.com>
Reported-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index ea7c501ac819..85fb5dc725ef 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -155,10 +155,12 @@ struct sched_ext_entity {
 };
 
 void sched_ext_free(struct task_struct *p);
+void print_scx_info(const char *log_lvl, struct task_struct *p);
 
 #else	/* !CONFIG_SCHED_CLASS_EXT */
 
 static inline void sched_ext_free(struct task_struct *p) {}
+static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
 
 #endif	/* CONFIG_SCHED_CLASS_EXT */
 #endif	/* _LINUX_SCHED_EXT_H */
-- 
cgit v1.2.3


From 07814a9439a3b03d79a1001614b5bc1cab69bcec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:18 -1000
Subject: sched_ext: Print debug dump after an error exit

If a BPF scheduler triggers an error, the scheduler is aborted and the
system is reverted to the built-in scheduler. In the process, a lot of
information which may be useful for figuring out what happened can be lost.

This patch adds debug dump which captures information which may be useful
for debugging including runqueue and runnable thread states at the time of
failure. The following shows a debug dump after triggering the watchdog:

  root@test ~# os/work/tools/sched_ext/build/bin/scx_qmap -t 100
  stats  : enq=1 dsp=0 delta=1 deq=0
  stats  : enq=90 dsp=90 delta=0 deq=0
  stats  : enq=156 dsp=156 delta=0 deq=0
  stats  : enq=218 dsp=218 delta=0 deq=0
  stats  : enq=255 dsp=255 delta=0 deq=0
  stats  : enq=271 dsp=271 delta=0 deq=0
  stats  : enq=284 dsp=284 delta=0 deq=0
  stats  : enq=293 dsp=293 delta=0 deq=0

  DEBUG DUMP
  ================================================================================

  kworker/u32:12[320] triggered exit kind 1026:
    runnable task stall (stress[1530] failed to run for 6.841s)

  Backtrace:
    scx_watchdog_workfn+0x136/0x1c0
    process_scheduled_works+0x2b5/0x600
    worker_thread+0x269/0x360
    kthread+0xeb/0x110
    ret_from_fork+0x36/0x40
    ret_from_fork_asm+0x1a/0x30

  QMAP FIFO[0]:
  QMAP FIFO[1]:
  QMAP FIFO[2]: 1436
  QMAP FIFO[3]:
  QMAP FIFO[4]:

  CPU states
  ----------

  CPU 0   : nr_run=1 ops_qseq=244
	    curr=swapper/0[0] class=idle_sched_class

    QMAP: dsp_idx=1 dsp_cnt=0

    R stress[1530] -6841ms
	scx_state/flags=3/0x1 ops_state/qseq=2/20
	sticky/holding_cpu=-1/-1 dsq_id=(n/a)
	cpus=ff

      QMAP: force_local=0

      asm_sysvec_apic_timer_interrupt+0x16/0x20

  CPU 2   : nr_run=2 ops_qseq=142
	    curr=swapper/2[0] class=idle_sched_class

    QMAP: dsp_idx=1 dsp_cnt=0

    R sshd[1703] -5905ms
	scx_state/flags=3/0x9 ops_state/qseq=2/88
	sticky/holding_cpu=-1/-1 dsq_id=(n/a)
	cpus=ff

      QMAP: force_local=1

      __x64_sys_ppoll+0xf6/0x120
      do_syscall_64+0x7b/0x150
      entry_SYSCALL_64_after_hwframe+0x76/0x7e

    R fish[1539] -4141ms
	scx_state/flags=3/0x9 ops_state/qseq=2/124
	sticky/holding_cpu=-1/-1 dsq_id=(n/a)
	cpus=ff

      QMAP: force_local=1

      futex_wait+0x60/0xe0
      do_futex+0x109/0x180
      __x64_sys_futex+0x117/0x190
      do_syscall_64+0x7b/0x150
      entry_SYSCALL_64_after_hwframe+0x76/0x7e

  CPU 3   : nr_run=2 ops_qseq=162
	    curr=kworker/u32:12[320] class=ext_sched_class

    QMAP: dsp_idx=1 dsp_cnt=0

   *R kworker/u32:12[320] +0ms
	scx_state/flags=3/0xd ops_state/qseq=0/0
	sticky/holding_cpu=-1/-1 dsq_id=(n/a)
	cpus=ff

      QMAP: force_local=0

      scx_dump_state+0x613/0x6f0
      scx_ops_error_irq_workfn+0x1f/0x40
      irq_work_run_list+0x82/0xd0
      irq_work_run+0x14/0x30
      __sysvec_irq_work+0x40/0x140
      sysvec_irq_work+0x60/0x70
      asm_sysvec_irq_work+0x16/0x20
      scx_watchdog_workfn+0x15f/0x1c0
      process_scheduled_works+0x2b5/0x600
      worker_thread+0x269/0x360
      kthread+0xeb/0x110
      ret_from_fork+0x36/0x40
      ret_from_fork_asm+0x1a/0x30

    R kworker/3:2[1436] +0ms
	scx_state/flags=3/0x9 ops_state/qseq=2/160
	sticky/holding_cpu=-1/-1 dsq_id=(n/a)
	cpus=08

      QMAP: force_local=0

      kthread+0xeb/0x110
      ret_from_fork+0x36/0x40
      ret_from_fork_asm+0x1a/0x30

  CPU 7   : nr_run=0 ops_qseq=76
	    curr=swapper/7[0] class=idle_sched_class


  ================================================================================

  EXIT: runnable task stall (stress[1530] failed to run for 6.841s)

It shows that CPU 3 was running the watchdog when it triggered the error
condition and the scx_qmap thread has been queued on CPU 0 for over 5
seconds but failed to run. It also prints out scx_qmap specific information
- e.g. which tasks are queued on each FIFO and so on using the dump_*() ops.
This dump has proved pretty useful for developing and debugging BPF
schedulers.

Debug dump is generated automatically when the BPF scheduler exits due to an
error. The debug buffer used in such cases is determined by
sched_ext_ops.exit_dump_len and defaults to 32k. If the debug dump overruns
the available buffer, the output is truncated and marked accordingly.

Debug dump output can also be read through the sched_ext_dump tracepoint.
When read through the tracepoint, there is no length limit.

SysRq-D can be used to trigger debug dump at any time while a BPF scheduler
is loaded. This is non-destructive - the scheduler keeps running afterwards.
The output can be read through the sched_ext_dump tracepoint.

v2: - The size of exit debug dump buffer can now be customized using
      sched_ext_ops.exit_dump_len.

    - sched_ext_ops.dump*() added to enable dumping of BPF scheduler
      specific information.

    - Tracpoint output and SysRq-D triggering added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
---
 include/trace/events/sched_ext.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 include/trace/events/sched_ext.h

(limited to 'include')

diff --git a/include/trace/events/sched_ext.h b/include/trace/events/sched_ext.h
new file mode 100644
index 000000000000..fe19da7315a9
--- /dev/null
+++ b/include/trace/events/sched_ext.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched_ext
+
+#if !defined(_TRACE_SCHED_EXT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_EXT_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(sched_ext_dump,
+
+	TP_PROTO(const char *line),
+
+	TP_ARGS(line),
+
+	TP_STRUCT__entry(
+		__string(line, line)
+	),
+
+	TP_fast_assign(
+		__assign_str(line);
+	),
+
+	TP_printk("%s",
+		__get_str(line)
+	)
+);
+
+#endif /* _TRACE_SCHED_EXT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 81aae789181b5850d77dfdf74d4b85c63f0705e9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:19 -1000
Subject: sched_ext: Implement scx_bpf_kick_cpu() and task preemption support

It's often useful to wake up and/or trigger reschedule on other CPUs. This
patch adds scx_bpf_kick_cpu() kfunc helper that BPF scheduler can call to
kick the target CPU into the scheduling path.

As a sched_ext task relinquishes its CPU only after its slice is depleted,
this patch also adds SCX_KICK_PREEMPT and SCX_ENQ_PREEMPT which clears the
slice of the target CPU's current task to guarantee that sched_ext's
scheduling path runs on the CPU.

If SCX_KICK_IDLE is specified, the target CPU is kicked iff the CPU is idle
to guarantee that the target CPU will go through at least one full sched_ext
scheduling cycle after the kicking. This can be used to wake up idle CPUs
without incurring unnecessary overhead if it isn't currently idle.

As a demonstration of how backward compatibility can be supported using BPF
CO-RE, tools/sched_ext/include/scx/compat.bpf.h is added. It provides
__COMPAT_scx_bpf_kick_cpu_IDLE() which uses SCX_KICK_IDLE if available or
becomes a regular kicking otherwise. This allows schedulers to use the new
SCX_KICK_IDLE while maintaining support for older kernels. The plan is to
temporarily use compat helpers to ease API updates and drop them after a few
kernel releases.

v5: - SCX_KICK_IDLE added. Note that this also adds a compat mechanism for
      schedulers so that they can support kernels without SCX_KICK_IDLE.
      This is useful as a demonstration of how new feature flags can be
      added in a backward compatible way.

    - kick_cpus_irq_workfn() reimplemented so that it touches the pending
      cpumasks only as necessary to reduce kicking overhead on machines with
      a lot of CPUs.

    - tools/sched_ext/include/scx/compat.bpf.h added.

v4: - Move example scheduler to its own patch.

v3: - Make scx_example_central switch all tasks by default.

    - Convert to BPF inline iterators.

v2: - Julia Lawall reported that scx_example_central can overflow the
      dispatch buffer and malfunction. As scheduling for other CPUs can't be
      handled by the automatic retry mechanism, fix by implementing an
      explicit overflow and retry handling.

    - Updated to use generic BPF cpumask helpers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 85fb5dc725ef..3b2809b980ac 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -134,6 +134,10 @@ struct sched_ext_entity {
 	 * scx_bpf_dispatch() but can also be modified directly by the BPF
 	 * scheduler. Automatically decreased by SCX as the task executes. On
 	 * depletion, a scheduling event is triggered.
+	 *
+	 * This value is cleared to zero if the task is preempted by
+	 * %SCX_KICK_PREEMPT and shouldn't be used to determine how long the
+	 * task ran. Use p->se.sum_exec_runtime instead.
 	 */
 	u64			slice;
 
-- 
cgit v1.2.3


From 22a920209ab6aa4f8ec960ed81041643fddeaec6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:19 -1000
Subject: sched_ext: Implement tickless support

Allow BPF schedulers to indicate tickless operation by setting p->scx.slice
to SCX_SLICE_INF. A CPU whose current task has infinte slice goes into
tickless operation.

scx_central is updated to use tickless operations for all tasks and
instead use a BPF timer to expire slices. This also uses the SCX_ENQ_PREEMPT
and task state tracking added by the previous patches.

Currently, there is no way to pin the timer on the central CPU, so it may
end up on one of the worker CPUs; however, outside of that, the worker CPUs
can go tickless both while running sched_ext tasks and idling.

With schbench running, scx_central shows:

  root@test ~# grep ^LOC /proc/interrupts; sleep 10; grep ^LOC /proc/interrupts
  LOC:     142024        656        664        449   Local timer interrupts
  LOC:     161663        663        665        449   Local timer interrupts

Without it:

  root@test ~ [SIGINT]# grep ^LOC /proc/interrupts; sleep 10; grep ^LOC /proc/interrupts
  LOC:     188778       3142       3793       3993   Local timer interrupts
  LOC:     198993       5314       6323       6438   Local timer interrupts

While scx_central itself is too barebone to be useful as a
production scheduler, a more featureful central scheduler can be built using
the same approach. Google's experience shows that such an approach can have
significant benefits for certain applications such as VM hosting.

v4: Allow operation even if BPF_F_TIMER_CPU_PIN is not available.

v3: Pin the central scheduler's timer on the central_cpu using
    BPF_F_TIMER_CPU_PIN.

v2: Convert to BPF inline iterators.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 3b2809b980ac..6f1a4977e9f8 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -16,6 +16,7 @@ enum scx_public_consts {
 	SCX_OPS_NAME_LEN	= 128,
 
 	SCX_SLICE_DFL		= 20 * 1000000,	/* 20ms */
+	SCX_SLICE_INF		= U64_MAX,	/* infinite, implies nohz */
 };
 
 /*
-- 
cgit v1.2.3


From 36454023f50b2aadd25b7f47c50b5edc0c59e1c0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:19 -1000
Subject: sched_ext: Track tasks that are subjects of the in-flight SCX
 operation

When some SCX operations are in flight, it is known that the subject task's
rq lock is held throughout which makes it safe to access certain fields of
the task - e.g. its current task_group. We want to add SCX kfunc helpers
that can make use of this guarantee - e.g. to help determining the currently
associated CPU cgroup from the task's current task_group.

As it'd be dangerous call such a helper on a task which isn't rq lock
protected, the helper should be able to verify the input task and reject
accordingly. This patch adds sched_ext_entity.kf_tasks[] that track the
tasks which are currently being operated on by a terminal SCX operation. The
new SCX_CALL_OP_[2]TASK[_RET]() can be used when invoking SCX operations
which take tasks as arguments and the scx_kf_allowed_on_arg_tasks() can be
used by kfunc helpers to verify the input task status.

Note that as sched_ext_entity.kf_tasks[] can't handle nesting, the tracking
is currently only limited to terminal SCX operations. If needed in the
future, this restriction can be removed by moving the tracking to the task
side with a couple per-task counters.

v2: Updated to reflect the addition of SCX_KF_SELECT_CPU.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
---
 include/linux/sched/ext.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 6f1a4977e9f8..74341dbc6a19 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -106,6 +106,7 @@ enum scx_kf_mask {
 
 	__SCX_KF_RQ_LOCKED	= SCX_KF_DISPATCH |
 				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
+	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
 
 /*
@@ -120,6 +121,7 @@ struct sched_ext_entity {
 	s32			sticky_cpu;
 	s32			holding_cpu;
 	u32			kf_mask;	/* see scx_kf_mask above */
+	struct task_struct	*kf_tasks[2];	/* see SCX_CALL_OP_TASK() */
 	atomic_long_t		ops_state;
 
 	struct list_head	runnable_node;	/* rq->scx.runnable_list */
-- 
cgit v1.2.3


From 245254f7081dbe1c8da54675d0e4ddbe74cee61b Mon Sep 17 00:00:00 2001
From: David Vernet <dvernet@meta.com>
Date: Tue, 18 Jun 2024 10:09:20 -1000
Subject: sched_ext: Implement sched_ext_ops.cpu_acquire/release()

Scheduler classes are strictly ordered and when a higher priority class has
tasks to run, the lower priority ones lose access to the CPU. Being able to
monitor and act on these events are necessary for use cases includling
strict core-scheduling and latency management.

This patch adds two operations ops.cpu_acquire() and .cpu_release(). The
former is invoked when a CPU becomes available to the BPF scheduler and the
opposite for the latter. This patch also implements
scx_bpf_reenqueue_local() which can be called from .cpu_release() to trigger
requeueing of all tasks in the local dsq of the CPU so that the tasks can be
reassigned to other available CPUs.

scx_pair is updated to use .cpu_acquire/release() along with
%SCX_KICK_WAIT to make the pair scheduling guarantee strict even when a CPU
is preempted by a higher priority scheduler class.

scx_qmap is updated to use .cpu_acquire/release() to empty the local
dsq of a preempted CPU. A similar approach can be adopted by BPF schedulers
that want to have a tight control over latency.

v4: Use the new SCX_KICK_IDLE to wake up a CPU after re-enqueueing.

v3: Drop the const qualifier from scx_cpu_release_args.task. BPF enforces
    access control through the verifier, so the qualifier isn't actually
    operative and only gets in the way when interacting with various
    helpers.

v2: Add p->scx.kf_mask annotation to allow calling scx_bpf_reenqueue_local()
    from ops.cpu_release() nested inside ops.init() and other sleepable
    operations.

Signed-off-by: David Vernet <dvernet@meta.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 74341dbc6a19..21c627337e01 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -98,13 +98,15 @@ enum scx_kf_mask {
 	SCX_KF_UNLOCKED		= 0,	  /* not sleepable, not rq locked */
 	/* all non-sleepables may be nested inside SLEEPABLE */
 	SCX_KF_SLEEPABLE	= 1 << 0, /* sleepable init operations */
+	/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
+	SCX_KF_CPU_RELEASE	= 1 << 1, /* ops.cpu_release() */
 	/* ops.dequeue (in REST) may be nested inside DISPATCH */
 	SCX_KF_DISPATCH		= 1 << 2, /* ops.dispatch() */
 	SCX_KF_ENQUEUE		= 1 << 3, /* ops.enqueue() and ops.select_cpu() */
 	SCX_KF_SELECT_CPU	= 1 << 4, /* ops.select_cpu() */
 	SCX_KF_REST		= 1 << 5, /* other rq-locked operations */
 
-	__SCX_KF_RQ_LOCKED	= SCX_KF_DISPATCH |
+	__SCX_KF_RQ_LOCKED	= SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
 				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
-- 
cgit v1.2.3


From 7b0888b7cc1924b74ce660e02f6211df8dd46e7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:20 -1000
Subject: sched_ext: Implement core-sched support

The core-sched support is composed of the following parts:

- task_struct->scx.core_sched_at is added. This is a timestamp which can be
  used to order tasks. Depending on whether the BPF scheduler implements
  custom ordering, it tracks either global FIFO ordering of all tasks or
  local-DSQ ordering within the dispatched tasks on a CPU.

- prio_less() is updated to call scx_prio_less() when comparing SCX tasks.
  scx_prio_less() calls ops.core_sched_before() if available or uses the
  core_sched_at timestamp. For global FIFO ordering, the BPF scheduler
  doesn't need to do anything. Otherwise, it should implement
  ops.core_sched_before() which reflects the ordering.

- When core-sched is enabled, balance_scx() balances all SMT siblings so
  that they all have tasks dispatched if necessary before pick_task_scx() is
  called. pick_task_scx() picks between the current task and the first
  dispatched task on the local DSQ based on availability and the
  core_sched_at timestamps. Note that FIFO ordering is expected among the
  already dispatched tasks whether running or on the local DSQ, so this path
  always compares core_sched_at instead of calling into
  ops.core_sched_before().

qmap_core_sched_before() is added to scx_qmap. It scales the
distances from the heads of the queues to compare the tasks across different
priority queues and seems to behave as expected.

v3: Fixed build error when !CONFIG_SCHED_SMT reported by Andrea Righi.

v2: Sched core added the const qualifiers to prio_less task arguments.
    Explicitly drop them for ops.core_sched_before() task arguments. BPF
    enforces access control through the verifier, so the qualifier isn't
    actually operative and only gets in the way when interacting with
    various helpers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Reviewed-by: Josh Don <joshdon@google.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
---
 include/linux/sched/ext.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 21c627337e01..3db7b32b2d1d 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -129,6 +129,9 @@ struct sched_ext_entity {
 	struct list_head	runnable_node;	/* rq->scx.runnable_list */
 	unsigned long		runnable_at;
 
+#ifdef CONFIG_SCHED_CORE
+	u64			core_sched_at;	/* see scx_prio_less() */
+#endif
 	u64			ddsp_dsq_id;
 	u64			ddsp_enq_flags;
 
-- 
cgit v1.2.3


From 06e51be3d5e7a07aea5c9012773df8d5de01db6c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:21 -1000
Subject: sched_ext: Add vtime-ordered priority queue to dispatch_q's

Currently, a dsq is always a FIFO. A task which is dispatched earlier gets
consumed or executed earlier. While this is sufficient when dsq's are used
for simple staging areas for tasks which are ready to execute, it'd make
dsq's a lot more useful if they can implement custom ordering.

This patch adds a vtime-ordered priority queue to dsq's. When the BPF
scheduler dispatches a task with the new scx_bpf_dispatch_vtime() helper, it
can specify the vtime tha the task should be inserted at and the task is
inserted into the priority queue in the dsq which is ordered according to
time_before64() comparison of the vtime values.

A DSQ can either be a FIFO or priority queue and automatically switches
between the two depending on whether scx_bpf_dispatch() or
scx_bpf_dispatch_vtime() is used. Using the wrong variant while the DSQ
already has the other type queued is not allowed and triggers an ops error.
Built-in DSQs must always be FIFOs.

This makes it very easy for the BPF schedulers to implement proper vtime
based scheduling within each dsq very easy and efficient at a negligible
cost in terms of code complexity and overhead.

scx_simple and scx_example_flatcg are updated to default to weighted
vtime scheduling (the latter within each cgroup). FIFO scheduling can be
selected with -f option.

v4: - As allowing mixing priority queue and FIFO on the same DSQ sometimes
      led to unexpected starvations, DSQs now error out if both modes are
      used at the same time and the built-in DSQs are no longer allowed to
      be priority queues.

    - Explicit type struct scx_dsq_node added to contain fields needed to be
      linked on DSQs. This will be used to implement stateful iterator.

    - Tasks are now always linked on dsq->list whether the DSQ is in FIFO or
      PRIQ mode. This confines PRIQ related complexities to the enqueue and
      dequeue paths. Other paths only need to look at dsq->list. This will
      also ease implementing BPF iterator.

    - Print p->scx.dsq_flags in debug dump.

v3: - SCX_TASK_DSQ_ON_PRIQ flag is moved from p->scx.flags into its own
      p->scx.dsq_flags. The flag is protected with the dsq lock unlike other
      flags in p->scx.flags. This led to flag corruption in some cases.

    - Add comments explaining the interaction between using consumption of
      p->scx.slice to determine vtime progress and yielding.

v2: - p->scx.dsq_vtime was not initialized on load or across cgroup
      migrations leading to some tasks being stalled for extended period of
      time depending on how saturated the machine is. Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
---
 include/linux/sched/ext.h | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 3db7b32b2d1d..9cee193dab19 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -49,12 +49,15 @@ enum scx_dsq_id_flags {
 };
 
 /*
- * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the
- * scheduler core and the BPF scheduler. See the documentation for more details.
+ * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered
+ * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to
+ * buffer between the scheduler core and the BPF scheduler. See the
+ * documentation for more details.
  */
 struct scx_dispatch_q {
 	raw_spinlock_t		lock;
 	struct list_head	list;	/* tasks in dispatch order */
+	struct rb_root		priq;	/* used to order by p->scx.dsq_vtime */
 	u32			nr;
 	u64			id;
 	struct rhash_head	hash_node;
@@ -86,6 +89,11 @@ enum scx_task_state {
 	SCX_TASK_NR_STATES,
 };
 
+/* scx_entity.dsq_flags */
+enum scx_ent_dsq_flags {
+	SCX_TASK_DSQ_ON_PRIQ	= 1 << 0, /* task is queued on the priority queue of a dsq */
+};
+
 /*
  * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
  * everywhere and the following bits track which kfunc sets are currently
@@ -111,13 +119,19 @@ enum scx_kf_mask {
 	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
 
+struct scx_dsq_node {
+	struct list_head	list;		/* dispatch order */
+	struct rb_node		priq;		/* p->scx.dsq_vtime order */
+	u32			flags;		/* SCX_TASK_DSQ_* flags */
+};
+
 /*
  * The following is embedded in task_struct and contains all fields necessary
  * for a task to be scheduled by SCX.
  */
 struct sched_ext_entity {
 	struct scx_dispatch_q	*dsq;
-	struct list_head	dsq_node;
+	struct scx_dsq_node	dsq_node;	/* protected by dsq lock */
 	u32			flags;		/* protected by rq lock */
 	u32			weight;
 	s32			sticky_cpu;
@@ -149,6 +163,15 @@ struct sched_ext_entity {
 	 */
 	u64			slice;
 
+	/*
+	 * Used to order tasks when dispatching to the vtime-ordered priority
+	 * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
+	 * but can also be modified directly by the BPF scheduler. Modifying it
+	 * while a task is queued on a dsq may mangle the ordering and is not
+	 * recommended.
+	 */
+	u64			dsq_vtime;
+
 	/*
 	 * If set, reject future sched_setscheduler(2) calls updating the policy
 	 * to %SCHED_EXT with -%EACCES.
-- 
cgit v1.2.3


From fa48e8d2c7b58d242c1db3a09c14f4274e055087 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Jun 2024 10:09:21 -1000
Subject: sched_ext: Documentation: scheduler: Document extensible scheduler
 class

Add Documentation/scheduler/sched-ext.rst which gives a high-level overview
and pointers to the examples.

v6: - Add paragraph explaining debug dump.

v5: - Updated to reflect /sys/kernel interface change. Kconfig options
      added.

v4: - README improved, reformatted in markdown and renamed to README.md.

v3: - Added tools/sched_ext/README.

    - Dropped _example prefix from scheduler names.

v2: - Apply minor edits suggested by Bagas. Caveats section dropped as all
      of them are addressed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
---
 include/linux/sched/ext.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 9cee193dab19..fe9a67ffe6b1 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -1,5 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
+ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
+ *
  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
-- 
cgit v1.2.3


From 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Wed, 3 Jul 2024 09:48:01 -0700
Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Xe, the perf layer allows capture of HW counter streams. These HW
counters are generally performance related but don't have to be necessarily
so. Also, the name "perf" is a carryover from i915 and is not preferred.

Here we propose the name "observation" for this common layer which allows
capture of different types of these counter streams.

v2: Rename observability layer to observation layer (Lucas/Rodrigo)
v3: Rename sysctl file to "observation_paranoid" (Jose)

Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types")
Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl")
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 12eaa8532b5c..33544ef78d3e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -80,7 +80,7 @@ extern "C" {
  *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
  *  - &DRM_IOCTL_XE_EXEC
  *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
- *  - &DRM_IOCTL_XE_PERF
+ *  - &DRM_IOCTL_XE_OBSERVATION
  */
 
 /*
@@ -101,7 +101,7 @@ extern "C" {
 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
 #define DRM_XE_EXEC			0x09
 #define DRM_XE_WAIT_USER_FENCE		0x0a
-#define DRM_XE_PERF			0x0b
+#define DRM_XE_OBSERVATION		0x0b
 
 /* Must be kept compact -- no holes */
 
@@ -116,7 +116,7 @@ extern "C" {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
-#define DRM_IOCTL_XE_PERF			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param)
+#define DRM_IOCTL_XE_OBSERVATION		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence {
 };
 
 /**
- * enum drm_xe_perf_type - Perf stream types
+ * enum drm_xe_observation_type - Observation stream types
  */
-enum drm_xe_perf_type {
-	/** @DRM_XE_PERF_TYPE_OA: OA perf stream type */
-	DRM_XE_PERF_TYPE_OA,
+enum drm_xe_observation_type {
+	/** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */
+	DRM_XE_OBSERVATION_TYPE_OA,
 };
 
 /**
- * enum drm_xe_perf_op - Perf stream ops
+ * enum drm_xe_observation_op - Observation stream ops
  */
-enum drm_xe_perf_op {
-	/** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */
-	DRM_XE_PERF_OP_STREAM_OPEN,
+enum drm_xe_observation_op {
+	/** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */
+	DRM_XE_OBSERVATION_OP_STREAM_OPEN,
 
-	/** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */
-	DRM_XE_PERF_OP_ADD_CONFIG,
+	/** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */
+	DRM_XE_OBSERVATION_OP_ADD_CONFIG,
 
-	/** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */
-	DRM_XE_PERF_OP_REMOVE_CONFIG,
+	/** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */
+	DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
 };
 
 /**
- * struct drm_xe_perf_param - Input of &DRM_XE_PERF
+ * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION
  *
- * The perf layer enables multiplexing perf counter streams of multiple
- * types. The actual params for a particular stream operation are supplied
- * via the @param pointer (use __copy_from_user to get these params).
+ * The observation layer enables multiplexing observation streams of
+ * multiple types. The actual params for a particular stream operation are
+ * supplied via the @param pointer (use __copy_from_user to get these
+ * params).
  */
-struct drm_xe_perf_param {
+struct drm_xe_observation_param {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
-	/** @perf_type: Perf stream type, of enum @drm_xe_perf_type */
-	__u64 perf_type;
-	/** @perf_op: Perf op, of enum @drm_xe_perf_op */
-	__u64 perf_op;
+	/** @observation_type: observation stream type, of enum @drm_xe_observation_type */
+	__u64 observation_type;
+	/** @observation_op: observation stream op, of enum @drm_xe_observation_op */
+	__u64 observation_op;
 	/** @param: Pointer to actual stream params */
 	__u64 param;
 };
 
 /**
- * enum drm_xe_perf_ioctls - Perf fd ioctl's
+ * enum drm_xe_observation_ioctls - Observation stream fd ioctl's
  *
- * Information exchanged between userspace and kernel for perf fd ioctl's
- * is stream type specific
+ * Information exchanged between userspace and kernel for observation fd
+ * ioctl's is stream type specific
  */
-enum drm_xe_perf_ioctls {
-	/** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */
-	DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0),
+enum drm_xe_observation_ioctls {
+	/** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */
+	DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0),
 
-	/** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */
-	DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1),
+	/** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */
+	DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1),
 
-	/** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */
-	DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2),
+	/** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */
+	DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2),
 
-	/** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */
-	DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3),
+	/** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */
+	DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3),
 
-	/** @DRM_XE_PERF_IOCTL_INFO: Return stream info */
-	DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4),
+	/** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */
+	DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4),
 };
 
 /**
@@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type {
  * Stream params are specified as a chain of @drm_xe_ext_set_property
  * struct's, with @property values from enum @drm_xe_oa_property_id and
  * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY.
- * @param field in struct @drm_xe_perf_param points to the first
+ * @param field in struct @drm_xe_observation_param points to the first
  * @drm_xe_ext_set_property struct.
  *
- * Exactly the same mechanism is also used for stream reconfiguration using
- * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of
- * properties below can be specified for stream reconfiguration.
+ * Exactly the same mechanism is also used for stream reconfiguration using the
+ * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a
+ * subset of properties below can be specified for stream reconfiguration.
  */
 enum drm_xe_oa_property_id {
 #define DRM_XE_OA_EXTENSION_SET_PROPERTY	0
@@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id {
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA
-	 * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG.
+	 * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG.
 	 */
 	DRM_XE_OA_PROPERTY_OA_METRIC_SET,
 
-	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */
+	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */
 	DRM_XE_OA_PROPERTY_OA_FORMAT,
 	/*
 	 * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942,
@@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id {
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA
-	 * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE).
+	 * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE).
 	 */
 	DRM_XE_OA_PROPERTY_OA_DISABLED,
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific
-	 * @exec_queue_id. Perf queries can be executed on this exec queue.
+	 * @exec_queue_id. OA queries can be executed on this exec queue.
 	 */
 	DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID,
 
@@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id {
 /**
  * struct drm_xe_oa_config - OA metric configuration
  *
- * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A
+ * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A
  * particular config can be specified when opening an OA stream using
  * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property.
  */
@@ -1645,8 +1646,9 @@ struct drm_xe_oa_config {
 
 /**
  * struct drm_xe_oa_stream_status - OA stream status returned from
- * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to
- * query stream status in response to EIO errno from perf fd read().
+ * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can
+ * call the ioctl to query stream status in response to EIO errno from
+ * observation fd read().
  */
 struct drm_xe_oa_stream_status {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status {
 
 /**
  * struct drm_xe_oa_stream_info - OA stream info returned from
- * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl
+ * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl
  */
 struct drm_xe_oa_stream_info {
 	/** @extensions: Pointer to the first extension struct, if any */
-- 
cgit v1.2.3


From 01e0cfc994be484ddcb9e121e353e51d8bb837c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 15:28:28 +0200
Subject: drm/xe: Use write-back caching mode for system memory on DGFX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.

However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.

Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.

So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.

Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.

v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)
v3:
- Really rephrase wording.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Jose Souza <jose.souza@intel.com>
Cc: Michal Mrozek <michal.mrozek@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Acked-by: Matthew Auld <matthew.auld@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Acked-by: Effie Yu <effie.yu@intel.com> #On chat
Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com
---
 include/uapi/drm/xe_drm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 33544ef78d3e..19619d4952a8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
 #define DRM_XE_GEM_CPU_CACHING_WC                      2
 	/**
 	 * @cpu_caching: The CPU caching mode to select for this object. If
-	 * mmaping the object the mode selected here will also be used.
+	 * mmaping the object the mode selected here will also be used. The
+	 * exception is when mapping system memory (including data evicted
+	 * to system) on discrete GPUs. The caching mode selected will
+	 * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+	 * between GPU- and CPU is guaranteed. The caching mode of
+	 * existing CPU-mappings will be updated transparently to
+	 * user-space clients.
 	 */
 	__u16 cpu_caching;
 	/** @pad: MBZ */
-- 
cgit v1.2.3


From d4af01c3731ff9c6e224d7183f8226a56d72b56c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Jul 2024 14:30:55 -1000
Subject: sched_ext: Take out ->priq and ->flags from scx_dsq_node

struct scx_dsq_node contains two data structure nodes to link the containing
task to a DSQ and a flags field that is protected by the lock of the
associated DSQ. One reason why they are grouped into a struct is to use the
type independently as a cursor node when iterating tasks on a DSQ. However,
when iterating, the cursor only needs to be linked on the FIFO list and the
rb_node part ends up inflating the size of the iterator data structure
unnecessarily making it potentially too expensive to place it on stack.

Take ->priq and ->flags out of scx_dsq_node and put them in sched_ext_entity
as ->dsq_priq and ->dsq_flags, respectively. scx_dsq_node is renamed to
scx_dsq_list_node and the field names are renamed accordingly. This will
help implementing DSQ task iterator that can be allocated on stack.

No functional change intended.

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: David Vernet <void@manifault.com>
---
 include/linux/sched/ext.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index fe9a67ffe6b1..eb9cfd18a923 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -121,10 +121,8 @@ enum scx_kf_mask {
 	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
 
-struct scx_dsq_node {
-	struct list_head	list;		/* dispatch order */
-	struct rb_node		priq;		/* p->scx.dsq_vtime order */
-	u32			flags;		/* SCX_TASK_DSQ_* flags */
+struct scx_dsq_list_node {
+	struct list_head	node;
 };
 
 /*
@@ -133,7 +131,9 @@ struct scx_dsq_node {
  */
 struct sched_ext_entity {
 	struct scx_dispatch_q	*dsq;
-	struct scx_dsq_node	dsq_node;	/* protected by dsq lock */
+	struct scx_dsq_list_node dsq_list;	/* dispatch order */
+	struct rb_node		dsq_priq;	/* p->scx.dsq_vtime order */
+	u32			dsq_flags;	/* protected by DSQ lock */
 	u32			flags;		/* protected by rq lock */
 	u32			weight;
 	s32			sticky_cpu;
-- 
cgit v1.2.3


From 650ba21b131ed1f8ee57826b2c6295a3be221132 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Jul 2024 14:30:55 -1000
Subject: sched_ext: Implement DSQ iterator

DSQs are very opaque in the consumption path. The BPF scheduler has no way
of knowing which tasks are being considered and which is picked. This patch
adds BPF DSQ iterator.

- Allows iterating tasks queued on a DSQ in the dispatch order or reverse
  from anywhere using bpf_for_each(scx_dsq) or calling the iterator kfuncs
  directly.

- Has ordering guarantee where only tasks which were already queued when the
  iteration started are visible and consumable during the iteration.

v5: - Add a comment to the naked list_empty(&dsq->list) test in
      consume_dispatch_q() to explain the reasoning behind the lockless test
      and by extension why nldsq_next_task() isn't used there.

    - scx_qmap changes separated into its own patch.

v4: - bpf_iter_scx_dsq_new() declaration in common.bpf.h was using the wrong
      type for the last argument (bool rev instead of u64 flags). Fix it.

v3: - Alexei pointed out that the iterator is too big to allocate on stack.
      Added a prep patch to reduce the size of the cursor. Now
      bpf_iter_scx_dsq is 48 bytes and bpf_iter_scx_dsq_kern is 40 bytes on
      64bit.

    - u32_before() comparison factored out.

v2: - scx_bpf_consume_task() is separated out into a separate patch.

    - DSQ seq and iter flags don't need to be u64. Use u32.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: bpf@vger.kernel.org
---
 include/linux/sched/ext.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index eb9cfd18a923..593d2f4909dd 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -61,6 +61,7 @@ struct scx_dispatch_q {
 	struct list_head	list;	/* tasks in dispatch order */
 	struct rb_root		priq;	/* used to order by p->scx.dsq_vtime */
 	u32			nr;
+	u32			seq;	/* used by BPF iter */
 	u64			id;
 	struct rhash_head	hash_node;
 	struct llist_node	free_node;
@@ -123,6 +124,7 @@ enum scx_kf_mask {
 
 struct scx_dsq_list_node {
 	struct list_head	node;
+	bool			is_bpf_iter_cursor;
 };
 
 /*
@@ -133,6 +135,7 @@ struct sched_ext_entity {
 	struct scx_dispatch_q	*dsq;
 	struct scx_dsq_list_node dsq_list;	/* dispatch order */
 	struct rb_node		dsq_priq;	/* p->scx.dsq_vtime order */
+	u32			dsq_seq;
 	u32			dsq_flags;	/* protected by DSQ lock */
 	u32			flags;		/* protected by rq lock */
 	u32			weight;
-- 
cgit v1.2.3


From f592e01664b4a57b109fcf6f6916145517f94bd7 Mon Sep 17 00:00:00 2001
From: Sebastian Wick <sebastian.wick@redhat.com>
Date: Tue, 2 Jul 2024 16:30:16 +0200
Subject: drm/drm_connector: Document Colorspace property variants

The initial idea of the Colorspace prop was that this maps 1:1 to
InfoFrames/SDP but KMS does not give user space enough information nor
control over the output format to figure out which variants can be used
for a given KMS commit. At the same time, properties like Broadcast RGB
expect full range quantization range being produced by user space from
the CRTC and drivers to convert to the range expected by the sink for
the chosen output format, mode, InfoFrames, etc.

This change documents the reality of the Colorspace property. The
Default variant unfortunately is very much driver specific and not
reflected by the EDID. The BT2020 variants are in active use by generic
compositors which have expectations from the driver about the
conversions it has to do when selecting certain output formats.

Everything else is also marked as undefined. Coming up with valid
behavior that makes it usable from user space and consistent with other
KMS properties for those variants is left as an exercise for whoever
wants to use them.

v2:
 * Talk about "pixel operation properties" that user space configures
 * Mention that user space is responsible for checking the EDID for sink
   support
 * Make it clear that drivers can choose between RGB and YCbCr on their
   own

Signed-off-by: Sebastian Wick <sebastian.wick@redhat.com>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240702143017.2429975-1-sebastian.wick@redhat.com
---
 include/drm/drm_connector.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index c754651044d4..e3fa43291f44 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -471,14 +471,6 @@ enum drm_privacy_screen_status {
  *
  * DP definitions come from the DP v2.0 spec
  * HDMI definitions come from the CTA-861-H spec
- *
- * A note on YCC and RGB variants:
- *
- * Since userspace is not aware of the encoding on the wire
- * (RGB or YCbCr), drivers are free to pick the appropriate
- * variant, regardless of what userspace selects. E.g., if
- * BT2020_RGB is selected by userspace a driver will pick
- * BT2020_YCC if the encoding on the wire is YUV444 or YUV420.
   *
  * @DRM_MODE_COLORIMETRY_DEFAULT:
  *   Driver specific behavior.
-- 
cgit v1.2.3


From 6be74ddd0609959b4005f88b6a4d4af678e4a71f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:31:59 +0200
Subject: drm/ttm: Allow TTM LRU list nodes of different types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be able to handle list unlocking while traversing the LRU
list, we want the iterators not only to point to the next
position of the list traversal, but to insert themselves as
list nodes at that point to work around the fact that the
next node might otherwise disappear from the list while
the iterator is pointing to it.

These list nodes need to be easily distinguishable from other
list nodes so that others traversing the list can skip
over them.

So declare a struct ttm_lru_item, with a struct list_head member
and a type enum. This will slightly increase the size of a
struct ttm_resource.

Changes in previous series:
- Update enum ttm_lru_item_type documentation.
v3:
- Introduce ttm_lru_first_res_or_null()
  (Christian König, Thomas Hellström)
v5:
- Update also the TTM test code (Xe CI).

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_resource.h | 54 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 69769355139f..1511d91e290d 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -49,6 +49,43 @@ struct io_mapping;
 struct sg_table;
 struct scatterlist;
 
+/**
+ * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
+ */
+enum ttm_lru_item_type {
+	/** @TTM_LRU_RESOURCE: The resource subclass */
+	TTM_LRU_RESOURCE,
+	/** @TTM_LRU_HITCH: The iterator hitch subclass */
+	TTM_LRU_HITCH
+};
+
+/**
+ * struct ttm_lru_item - The TTM lru list node base class
+ * @link: The list link
+ * @type: The subclass type
+ */
+struct ttm_lru_item {
+	struct list_head link;
+	enum ttm_lru_item_type type;
+};
+
+/**
+ * ttm_lru_item_init() - initialize a struct ttm_lru_item
+ * @item: The item to initialize
+ * @type: The subclass type
+ */
+static inline void ttm_lru_item_init(struct ttm_lru_item *item,
+				     enum ttm_lru_item_type type)
+{
+	item->type = type;
+	INIT_LIST_HEAD(&item->link);
+}
+
+static inline bool ttm_lru_item_is_res(const struct ttm_lru_item *item)
+{
+	return item->type == TTM_LRU_RESOURCE;
+}
+
 struct ttm_resource_manager_func {
 	/**
 	 * struct ttm_resource_manager_func member alloc
@@ -217,9 +254,21 @@ struct ttm_resource {
 	/**
 	 * @lru: Least recently used list, see &ttm_resource_manager.lru
 	 */
-	struct list_head lru;
+	struct ttm_lru_item lru;
 };
 
+/**
+ * ttm_lru_item_to_res() - Downcast a struct ttm_lru_item to a struct ttm_resource
+ * @item: The struct ttm_lru_item to downcast
+ *
+ * Return: Pointer to the embedding struct ttm_resource
+ */
+static inline struct ttm_resource *
+ttm_lru_item_to_res(struct ttm_lru_item *item)
+{
+	return container_of(item, struct ttm_resource, lru);
+}
+
 /**
  * struct ttm_resource_cursor
  *
@@ -393,6 +442,9 @@ ttm_resource_manager_next(struct ttm_resource_manager *man,
 			  struct ttm_resource_cursor *cursor,
 			  struct ttm_resource *res);
 
+struct ttm_resource *
+ttm_lru_first_res_or_null(struct list_head *head);
+
 /**
  * ttm_resource_manager_for_each_res - iterate over all resources
  * @man: the resource manager
-- 
cgit v1.2.3


From 9c62fb62c9f0761eeda8f2a9517e007ff2cdbe9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:00 +0200
Subject: drm/ttm: Slightly clean up LRU list iteration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make the transition to using lru hitches easier,
simplify the ttm_resource_manager_next() interface to only take
the cursor and reuse ttm_resource_manager_next() functionality
from ttm_resource_manager_first().

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_resource.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1511d91e290d..7d81fd5b5b83 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 /**
  * struct ttm_resource_cursor
  *
+ * @man: The resource manager currently being iterated over.
+ * @cur: The list head the cursor currently points to.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
+	struct ttm_resource_manager *man;
+	struct list_head *cur;
 	unsigned int priority;
 };
 
@@ -438,9 +442,7 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
 			   struct ttm_resource_cursor *cursor);
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
-			  struct ttm_resource_cursor *cursor,
-			  struct ttm_resource *res);
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor);
 
 struct ttm_resource *
 ttm_lru_first_res_or_null(struct list_head *head);
@@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head);
  */
 #define ttm_resource_manager_for_each_res(man, cursor, res)		\
 	for (res = ttm_resource_manager_first(man, cursor); res;	\
-	     res = ttm_resource_manager_next(man, cursor, res))
+	     res = ttm_resource_manager_next(cursor))
 
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
-- 
cgit v1.2.3


From 8e9bf0fb10a79aaed37474600948cd33d14aa606 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:01 +0200
Subject: drm/ttm: Use LRU hitches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Have iterators insert themselves into the list they are iterating
over using hitch list nodes. Since only the iterator owner
can remove these list nodes from the list, it's safe to unlock
the list and when continuing, use them as a starting point. Due to
the way LRU bumping works in TTM, newly added items will not be
missed, and bumped items will be iterated over a second time before
reaching the end of the list.

The exception is list with bulk move sublists. When bumping a
sublist, a hitch that is part of that sublist will also be moved
and we might miss items if restarting from it. This will be
addressed in a later patch.

Changes in previous series:
- Updated ttm_resource_cursor_fini() documentation.
v2:
- Don't reorder ttm_resource_manager_first() and _next().
  (Christian König).
- Use list_add instead of list_move
  (Christian König)
v3:
- Split into two patches, one cleanup, one new functionality
  (Christian König)
- use ttm_resource_cursor_fini_locked() instead of open-coding
  (Matthew Brost)

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-4-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_resource.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 7d81fd5b5b83..8fac781f641e 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -273,17 +273,22 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
  * struct ttm_resource_cursor
  *
  * @man: The resource manager currently being iterated over.
- * @cur: The list head the cursor currently points to.
+ * @hitch: A hitch list node inserted before the next resource
+ * to iterate over.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
 	struct ttm_resource_manager *man;
-	struct list_head *cur;
+	struct ttm_lru_item hitch;
 	unsigned int priority;
 };
 
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor);
+
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor);
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
-- 
cgit v1.2.3


From 4c44f89c5daee9540cb7428de5d835bd00951350 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:02 +0200
Subject: drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist
 moves
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

Changes in previous series:
- Completely rework the functionality
- Avoid a NULL pointer dereference assigning manager->mem_type
- Remove some leftover code causing build problems
v2:
- For hitch bulk tail moves, store the mem_type in the cursor
  instead of with the manager.
v3:
- Remove leftover mem_type member from change in v2.
v6:
- Add some lockdep asserts (Matthew Brost)
- Avoid NULL pointer dereference (Matthew Brost)
- No need to check bo->resource before dereferencing
  bo->bulk_move (Matthew Brost)

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-5-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_resource.h | 56 ++++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 8fac781f641e..571abb4861a6 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -269,26 +269,6 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 	return container_of(item, struct ttm_resource, lru);
 }
 
-/**
- * struct ttm_resource_cursor
- *
- * @man: The resource manager currently being iterated over.
- * @hitch: A hitch list node inserted before the next resource
- * to iterate over.
- * @priority: the current priority
- *
- * Cursor to iterate over the resources in a manager.
- */
-struct ttm_resource_cursor {
-	struct ttm_resource_manager *man;
-	struct ttm_lru_item hitch;
-	unsigned int priority;
-};
-
-void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor);
-
-void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor);
-
 /**
  * struct ttm_lru_bulk_move_pos
  *
@@ -304,8 +284,9 @@ struct ttm_lru_bulk_move_pos {
 
 /**
  * struct ttm_lru_bulk_move
- *
  * @pos: first/last lru entry for resources in the each domain/priority
+ * @cursor_list: The list of cursors currently traversing any of
+ * the sublists of @pos. Protected by the ttm device's lru_lock.
  *
  * Container for the current bulk move state. Should be used with
  * ttm_lru_bulk_move_init() and ttm_bo_set_bulk_move().
@@ -315,8 +296,39 @@ struct ttm_lru_bulk_move_pos {
  */
 struct ttm_lru_bulk_move {
 	struct ttm_lru_bulk_move_pos pos[TTM_NUM_MEM_TYPES][TTM_MAX_BO_PRIORITY];
+	struct list_head cursor_list;
 };
 
+/**
+ * struct ttm_resource_cursor
+ * @man: The resource manager currently being iterated over
+ * @hitch: A hitch list node inserted before the next resource
+ * to iterate over.
+ * @bulk_link: A list link for the list of cursors traversing the
+ * bulk sublist of @bulk. Protected by the ttm device's lru_lock.
+ * @bulk: Pointer to struct ttm_lru_bulk_move whose subrange @hitch is
+ * inserted to. NULL if none. Never dereference this pointer since
+ * the struct ttm_lru_bulk_move object pointed to might have been
+ * freed. The pointer is only for comparison.
+ * @mem_type: The memory type of the LRU list being traversed.
+ * This field is valid iff @bulk != NULL.
+ * @priority: the current priority
+ *
+ * Cursor to iterate over the resources in a manager.
+ */
+struct ttm_resource_cursor {
+	struct ttm_resource_manager *man;
+	struct ttm_lru_item hitch;
+	struct list_head bulk_link;
+	struct ttm_lru_bulk_move *bulk;
+	unsigned int mem_type;
+	unsigned int priority;
+};
+
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor);
+
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor);
+
 /**
  * struct ttm_kmap_iter_iomap - Specialization for a struct io_mapping +
  * struct sg_table backed struct ttm_resource.
@@ -405,6 +417,8 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
 void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
+void ttm_lru_bulk_move_fini(struct ttm_device *bdev,
+			    struct ttm_lru_bulk_move *bulk);
 
 void ttm_resource_add_bulk_move(struct ttm_resource *res,
 				struct ttm_buffer_object *bo);
-- 
cgit v1.2.3


From da966b82bf3d16f89a05732c933a589ec798d3f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:03 +0200
Subject: drm/ttm: Provide a generic LRU walker helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)
v7:
- Use s64 rather than long for the target and progress
  (Christian König)
- Update documentation to not encourage using pages as a
  progress measure. (Christian König)
- Remove cond_resched(). (Christian König)

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-6-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_bo.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index ef0f52f56ebc..21fa9d5964ec 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -194,6 +194,41 @@ struct ttm_operation_ctx {
 	uint64_t bytes_moved;
 };
 
+struct ttm_lru_walk;
+
+/** struct ttm_lru_walk_ops - Operations for a LRU walk. */
+struct ttm_lru_walk_ops {
+	/**
+	 * process_bo - Process this bo.
+	 * @walk: struct ttm_lru_walk describing the walk.
+	 * @bo: A locked and referenced buffer object.
+	 *
+	 * Return: Negative error code on error, User-defined positive value
+	 * (typically, but not always, size of the processed bo) on success.
+	 * On success, the returned values are summed by the walk and the
+	 * walk exits when its target is met.
+	 * 0 also indicates success, -EBUSY means this bo was skipped.
+	 */
+	s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo);
+};
+
+/**
+ * struct ttm_lru_walk - Structure describing a LRU walk.
+ */
+struct ttm_lru_walk {
+	/** @ops: Pointer to the ops structure. */
+	const struct ttm_lru_walk_ops *ops;
+	/** @ctx: Pointer to the struct ttm_operation_ctx. */
+	struct ttm_operation_ctx *ctx;
+	/** @ticket: The struct ww_acquire_ctx if any. */
+	struct ww_acquire_ctx *ticket;
+	/** @tryock_only: Only use trylock for locking. */
+	bool trylock_only;
+};
+
+s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+			   struct ttm_resource_manager *man, s64 target);
+
 /**
  * ttm_bo_get - reference a struct ttm_buffer_object
  *
-- 
cgit v1.2.3


From 10efe34dae798c652053d4363871914c478f1475 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:04 +0200
Subject: drm/ttm: Use the LRU walker helper for swapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rework the TTM swapping to use the LRU walker helper.
This helps fixing up the ttm_bo_swapout() interface
to be consistent about not requiring any locking.

For now mimic the current behaviour of using trylock
only. We could be using ticket-locks here but defer
that until it's deemed necessary. The TTM swapout
functionality is a bit weird anyway since it
alternates between memory types without exhausting
TTM_PL_SYSTEM first.

Intentionally keep pages as the unit of progress since
changing that to bytes is an unrelated change that can
be done later.

v6:
- Improve on error code translation in the swapout callback
  (Matthew Brost).
v7:
- Use s64 rather than long.
- Remove ttm_resource_cursor_fini() since it's no longer used.
- Rename ttm_resource_cursor_fini_locked() to
  ttm_resource_cursor_fini().
- Don't swap out pinned bos.

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-7-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_bo.h       | 5 +++--
 include/drm/ttm/ttm_resource.h | 2 --
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 21fa9d5964ec..eb2692de06b8 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -417,8 +417,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map);
 int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map);
 void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map);
 int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo);
-int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-		   gfp_t gfp_flags);
+s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+		   struct ttm_resource_manager *man, gfp_t gfp_flags,
+		   s64 target);
 void ttm_bo_pin(struct ttm_buffer_object *bo);
 void ttm_bo_unpin(struct ttm_buffer_object *bo);
 int ttm_mem_evict_first(struct ttm_device *bdev,
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 571abb4861a6..be034be56ba1 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -325,8 +325,6 @@ struct ttm_resource_cursor {
 	unsigned int priority;
 };
 
-void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor);
-
 void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor);
 
 /**
-- 
cgit v1.2.3


From 3756310e9fe1e0182adac89cedaa98c0eea66675 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 17:32:05 +0200
Subject: drm/ttm: Use the LRU walker for eviction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

v6:
- Various cleanups suggested by Matthew Brost.
- Fix error return code of ttm_bo_evict_first(). (Matthew Brost)
- Fix an error check that was inverted. (Matthew Brost)
v7:
- Use s64 rather than long (Christian König)
- Early ttm_resource_cursor_fini() in ttm_bo_evict_first().
- Simplify check for bo_moved in ttm_bo_evict_first().
  (Christian König)
- Don't evict pinned bos.

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-8-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_bo.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index eb2692de06b8..d1a732d56259 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -422,11 +422,9 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 		   s64 target);
 void ttm_bo_pin(struct ttm_buffer_object *bo);
 void ttm_bo_unpin(struct ttm_buffer_object *bo);
-int ttm_mem_evict_first(struct ttm_device *bdev,
-			struct ttm_resource_manager *man,
-			const struct ttm_place *place,
-			struct ttm_operation_ctx *ctx,
-			struct ww_acquire_ctx *ticket);
+int ttm_bo_evict_first(struct ttm_device *bdev,
+		       struct ttm_resource_manager *man,
+		       struct ttm_operation_ctx *ctx);
 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 			     struct vm_fault *vmf);
 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
-- 
cgit v1.2.3


From 76299a557f36d624ca32500173ad7856e1ad93c0 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 3 Jul 2024 00:17:21 -0500
Subject: drm: Introduce 'power saving policy' drm property

The `power saving policy` DRM property is an optional property that
can be added to a connector by a driver.

This property is for compositors to indicate intent of policy of
whether a driver can use power saving features that may compromise
the experience intended by the compositor.

Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703051722.328-2-mario.limonciello@amd.com
---
 include/drm/drm_connector.h   | 2 ++
 include/drm/drm_mode_config.h | 5 +++++
 include/uapi/drm/drm_mode.h   | 7 +++++++
 3 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index e3fa43291f44..5ad735253413 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -2267,6 +2267,8 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector,
 					   u32 supported_colorspaces);
 int drm_mode_create_content_type_property(struct drm_device *dev);
 int drm_mode_create_suggested_offset_properties(struct drm_device *dev);
+int drm_mode_create_power_saving_policy_property(struct drm_device *dev,
+						 uint64_t supported_policies);
 
 int drm_connector_set_path_property(struct drm_connector *connector,
 				    const char *path);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index ab0f167474b1..150f9a3b649f 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -969,6 +969,11 @@ struct drm_mode_config {
 	 */
 	struct drm_atomic_state *suspend_state;
 
+	/**
+	 * @power_saving_policy: bitmask for power saving policy requests.
+	 */
+	struct drm_property *power_saving_policy;
+
 	const struct drm_mode_config_helper_funcs *helper_private;
 };
 
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index d390011b89b4..880303c2ad97 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -152,6 +152,13 @@ extern "C" {
 #define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
 #define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
+/* power saving policy options */
+#define DRM_MODE_REQUIRE_COLOR_ACCURACY	BIT(0)	/* Compositor requires color accuracy */
+#define DRM_MODE_REQUIRE_LOW_LATENCY	BIT(1)	/* Compositor requires low latency */
+
+#define DRM_MODE_POWER_SAVING_POLICY_ALL	(DRM_MODE_REQUIRE_COLOR_ACCURACY |\
+						 DRM_MODE_REQUIRE_LOW_LATENCY)
+
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
 #define DRM_MODE_DITHERING_ON	1
-- 
cgit v1.2.3


From 0daf44ea9dccfcdb1fe694e9ea85497b6cc3a065 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 8 Jul 2024 22:00:28 +0300
Subject: drm/dp: Add helper to dump an LTTPR PHY descriptor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper to dump the DPCD descriptor for an LTTPR PHY. This is based
on [1] and [2] moving the helper to DRM core as suggested by Ville.

[1] https://lore.kernel.org/all/20240703155937.1674856-5-imre.deak@intel.com
[2] https://lore.kernel.org/all/20240703155937.1674856-6-imre.deak@intel.com

Cc: dri-devel@lists.freedesktop.org
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-6-imre.deak@intel.com
---
 include/drm/display/drm_dp.h        | 4 ++++
 include/drm/display/drm_dp_helper.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index 0b032faa8cf2..0146ccfe9c2e 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -1523,6 +1523,10 @@ enum drm_dp_phy {
 #define DP_SYMBOL_ERROR_COUNT_LANE2_PHY_REPEATER1	    0xf0039 /* 1.3 */
 #define DP_SYMBOL_ERROR_COUNT_LANE3_PHY_REPEATER1	    0xf003b /* 1.3 */
 
+#define DP_OUI_PHY_REPEATER1				    0xf003d /* 1.3 */
+#define DP_OUI_PHY_REPEATER(dp_phy) \
+	DP_LTTPR_REG(dp_phy, DP_OUI_PHY_REPEATER1)
+
 #define __DP_FEC1_BASE					    0xf0290 /* 1.4 */
 #define __DP_FEC2_BASE					    0xf0298 /* 1.4 */
 #define DP_FEC_BASE(dp_phy) \
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 8defcc399f42..f88d78e43443 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -656,6 +656,8 @@ struct drm_dp_desc {
 int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
 		     bool is_branch);
 
+int drm_dp_dump_lttpr_desc(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy);
+
 /**
  * enum drm_dp_quirk - Display Port sink/branch device specific quirks
  *
-- 
cgit v1.2.3


From e8858fc07eb8386b9b6436d3e86eeb4b81e4f660 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 13 Jul 2024 23:00:27 -0700
Subject: Input: msc5000_ts - remove the driver

MCS-5000 belongs to the 1st generation of Melfas chips, manufactured in
2000-2007.

The driver relies on custom platform data (no DT support) and there
never were any users of this driver in the mainline kernel. The commit
adding the driver mentioned that the driver was tested on S3C6410 NCP
board (with Samsung S3C6410 SoC) but the touchscreen device was never
added to the board file. This board was removed in v6.3 in commit
743c8fbb90ca ("ARM: s3c: remove most s3c64xx board support").

Remove the driver since there are no users.

Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240714060029.1528662-1-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/mcs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h
index fcc6f2a1f5c3..f3b0749f1630 100644
--- a/include/linux/platform_data/mcs.h
+++ b/include/linux/platform_data/mcs.h
@@ -16,10 +16,6 @@ struct mcs_platform_data {
 	void (*poweron)(bool);
 	void (*cfg_pin)(void);
 
-	/* touchscreen */
-	unsigned int x_size;
-	unsigned int y_size;
-
 	/* touchkey */
 	const u32 *keymap;
 	unsigned int keymap_size;
-- 
cgit v1.2.3


From dd29eadee1e8f07bbec57dddf4befbcd3e3c0af7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 13 Jul 2024 23:00:28 -0700
Subject: Input: msc_touchkey - remove the driver

MCS-5000/5080 chips belong to the 1st generation of Melfas chips,
manufactured in 2000-2007.

The driver relies on custom platform data (no DT support) and there
never were any users of this driver in the mainline kernel. It is likely
that the driver was (like mcs5000_ts driver) was tested on S3C6410 NCP
board (with Samsung S3C6410 SoC), but the touchkey device was never
added to the board file. This board was removed in v6.3 in commit
743c8fbb90ca ("ARM: s3c: remove most s3c64xx board support").

Remove the driver since there are no users.

Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240714060029.1528662-2-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/mcs.h | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 include/linux/platform_data/mcs.h

(limited to 'include')

diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h
deleted file mode 100644
index f3b0749f1630..000000000000
--- a/include/linux/platform_data/mcs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- * Author: HeungJun Kim <riverful.kim@samsung.com>
- */
-
-#ifndef __LINUX_MCS_H
-#define __LINUX_MCS_H
-
-#define MCS_KEY_MAP(v, c)	((((v) & 0xff) << 16) | ((c) & 0xffff))
-#define MCS_KEY_VAL(v)		(((v) >> 16) & 0xff)
-#define MCS_KEY_CODE(v)		((v) & 0xffff)
-
-struct mcs_platform_data {
-	void (*poweron)(bool);
-	void (*cfg_pin)(void);
-
-	/* touchkey */
-	const u32 *keymap;
-	unsigned int keymap_size;
-	unsigned int key_maxval;
-	bool no_autorepeat;
-};
-
-#endif	/* __LINUX_MCS_H */
-- 
cgit v1.2.3


From 0c60eb0cc320fffbb8b10329d276af14f6f5e6bf Mon Sep 17 00:00:00 2001
From: Kyoungrul Kim <k831.kim@samsung.com>
Date: Wed, 10 Jul 2024 08:25:20 +0900
Subject: scsi: ufs: core: Check LSDBS cap when !mcq

If the user sets use_mcq_mode to 0, the host will try to activate the LSDB
mode unconditionally even when the LSDBS of device HCI cap is 1. This makes
commands time out and causes device probing to fail.

To prevent that problem, check the LSDBS cap when MCQ is not supported.

Signed-off-by: Kyoungrul Kim <k831.kim@samsung.com>
Link: https://lore.kernel.org/r/20240709232520epcms2p8ebdb5c4fccc30a6221390566589bf122@epcms2p8
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 1 +
 include/ufs/ufshci.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index a43b14276bc3..cac0cdb9a916 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1109,6 +1109,7 @@ struct ufs_hba {
 	bool ext_iid_sup;
 	bool scsi_host_added;
 	bool mcq_sup;
+	bool lsdb_sup;
 	bool mcq_enabled;
 	struct ufshcd_res_info res[RES_MAX];
 	void __iomem *mcq_base;
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index 38fe97971a65..9917c7743d80 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -77,6 +77,7 @@ enum {
 	MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT	= 0x02000000,
 	MASK_UIC_DME_TEST_MODE_SUPPORT		= 0x04000000,
 	MASK_CRYPTO_SUPPORT			= 0x10000000,
+	MASK_LSDB_SUPPORT			= 0x20000000,
 	MASK_MCQ_SUPPORT			= 0x40000000,
 };
 
-- 
cgit v1.2.3


From af22bbe1f4a514c80b89a27252beef033168f4e9 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Tue, 16 Jul 2024 13:35:48 +0200
Subject: virtio: create admin queues alongside other virtqueues

Admin virtqueue is just another virtqueue nothing that special about it.
The current implementation treats it somehow separate though in terms
of creation and deletion. Unify the admin virtqueue creation and
deletion flows to be aligned with the rest of virtqueues, creating
it from vp_find_vqs_*() helpers. Let the admin virtqueue to be deleted
by vp_del_vqs() as the rest.

Call vp_find_one_vq_msix() with slow_path argument being "true" to make
sure that in case of limited interrupt vectors the config vector is used
for admin queue.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240716113552.80599-10-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index ab4b9a3fef6b..169c7d367fac 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -104,8 +104,6 @@ struct virtqueue_info {
  *	Returns 0 on success or error status
  *	If disable_vq_and_reset is set, then enable_vq_after_reset must also be
  *	set.
- * @create_avq: create admin virtqueue resource.
- * @destroy_avq: destroy admin virtqueue resource.
  */
 struct virtio_config_ops {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -133,8 +131,6 @@ struct virtio_config_ops {
 			       struct virtio_shm_region *region, u8 id);
 	int (*disable_vq_and_reset)(struct virtqueue *vq);
 	int (*enable_vq_after_reset)(struct virtqueue *vq);
-	int (*create_avq)(struct virtio_device *vdev);
-	void (*destroy_avq)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
cgit v1.2.3


From 4c3b54af907e709609d3d8beca92d65e2f0cfd83 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Tue, 16 Jul 2024 13:35:51 +0200
Subject: virtio_pci_modern: use completion instead of busy loop to wait on
 admin cmd result

Currently, the code waits in a busy loop on every admin virtqueue issued
command to get a reply. That prevents callers from issuing multiple
commands in parallel.

To overcome this limitation, introduce a virtqueue event callback for
admin virtqueue. For every issued command, use completion mechanism
to wait on a reply. In the event callback, trigger the completion
is done for every incoming reply.

Alongside with that, introduce a spin lock to protect the admin
virtqueue operations.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240716113552.80599-13-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 96fea920873b..999ff5934392 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -10,6 +10,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/gfp.h>
 #include <linux/dma-mapping.h>
+#include <linux/completion.h>
 
 /**
  * struct virtqueue - a queue to register buffers for sending or receiving.
@@ -109,6 +110,8 @@ struct virtio_admin_cmd {
 	__le64 group_member_id;
 	struct scatterlist *data_sg;
 	struct scatterlist *result_sg;
+	struct completion completion;
+	int ret;
 };
 
 /**
-- 
cgit v1.2.3


From e1a261ba599eec97e1c5c7760d5c3698fc24e6a6 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Tue, 2 Jul 2024 14:26:04 +0200
Subject: printk: Add a short description string to kmsg_dump()

kmsg_dump doesn't forward the panic reason string to the kmsg_dumper
callback.
This patch adds a new struct kmsg_dump_detail, that will hold the
reason and description, and pass it to the dump() callback.

To avoid updating all kmsg_dump() call, it adds a kmsg_dump_desc()
function and a macro for backward compatibility.

I've written this for drm_panic, but it can be useful for other
kmsg_dumper.
It allows to see the panic reason, like "sysrq triggered crash"
or "VFS: Unable to mount root fs on xxxx" on the drm panic screen.

v2:
 * Use a struct kmsg_dump_detail to hold the reason and description
   pointer, for more flexibility if we want to add other parameters.
   (Kees Cook)
 * Fix powerpc/nvram_64 build, as I didn't update the forward
   declaration of oops_to_nvram()

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Kees Cook <kees@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240702122639.248110-1-jfalempe@redhat.com
---
 include/linux/kmsg_dump.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 906521c2329c..6055fc969877 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -39,6 +39,17 @@ struct kmsg_dump_iter {
 	u64	next_seq;
 };
 
+/**
+ * struct kmsg_dump_detail - kernel crash detail
+ * @reason: reason for the crash, see kmsg_dump_reason.
+ * @description: optional short string, to provide additional information.
+ */
+
+struct kmsg_dump_detail {
+	enum kmsg_dump_reason reason;
+	const char *description;
+};
+
 /**
  * struct kmsg_dumper - kernel crash message dumper structure
  * @list:	Entry in the dumper list (private)
@@ -49,13 +60,13 @@ struct kmsg_dump_iter {
  */
 struct kmsg_dumper {
 	struct list_head list;
-	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
+	void (*dump)(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail);
 	enum kmsg_dump_reason max_reason;
 	bool registered;
 };
 
 #ifdef CONFIG_PRINTK
-void kmsg_dump(enum kmsg_dump_reason reason);
+void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc);
 
 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
 			char *line, size_t size, size_t *len);
@@ -71,7 +82,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper);
 
 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason);
 #else
-static inline void kmsg_dump(enum kmsg_dump_reason reason)
+static inline void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc)
 {
 }
 
@@ -107,4 +118,9 @@ static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
 }
 #endif
 
+static inline void kmsg_dump(enum kmsg_dump_reason reason)
+{
+	kmsg_dump_desc(reason, NULL);
+}
+
 #endif /* _LINUX_KMSG_DUMP_H */
-- 
cgit v1.2.3


From 7108b4a589cd6d3a2c1276fd610b3500f46de66a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 10 Jul 2024 15:02:27 -0700
Subject: drm/xe/uapi: Expose SIMD16 EU mask in topology query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly
reporting for PVC the number of 16-wide EUs without giving userspace any
hint that they were different than for other platforms. Xe2 and later
also have 16-wide, but in those cases the reported number would
correspond to the 8-wide count.

To avoid confusion and make sure the right number is used by userspace
depending on the platform, add a new item to the topology query and drop
the one that is not available. The new mask reported for both PVC and
Xe2 should now match the numbers reported via hwconfig.

v2: Use a different topo item with EU type in its name to report the
    new mask instead of adding the type itself as the item (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
Acked-by: Wenbin Lu <wenbin.lu@intel.com>
Acked-by: Effie Yu <effie.yu@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240710220446.2169797-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 include/uapi/drm/xe_drm.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 19619d4952a8..29425d7fdc77 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -517,7 +517,14 @@ struct drm_xe_query_gt_list {
  *    available per Dual Sub Slices (DSS). For example a query response
  *    containing the following in mask:
  *    ``EU_PER_DSS    ff ff 00 00 00 00 00 00``
- *    means each DSS has 16 EU.
+ *    means each DSS has 16 SIMD8 EUs. This type may be omitted if device
+ *    doesn't have SIMD8 EUs.
+ *  - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution
+ *    Units (EU) available per Dual Sub Slices (DSS). For example a query
+ *    response containing the following in mask:
+ *    ``SIMD16_EU_PER_DSS    ff ff 00 00 00 00 00 00``
+ *    means each DSS has 16 SIMD16 EUs. This type may be omitted if device
+ *    doesn't have SIMD16 EUs.
  */
 struct drm_xe_query_topology_mask {
 	/** @gt_id: GT ID the mask is associated with */
@@ -527,6 +534,7 @@ struct drm_xe_query_topology_mask {
 #define DRM_XE_TOPO_DSS_COMPUTE		2
 #define DRM_XE_TOPO_L3_BANK		3
 #define DRM_XE_TOPO_EU_PER_DSS		4
+#define DRM_XE_TOPO_SIMD16_EU_PER_DSS	5
 	/** @type: type of mask */
 	__u16 type;
 
-- 
cgit v1.2.3


From d20a9f568f99591886ec73b80ff7283486710643 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Wed, 17 Jul 2024 10:48:40 +0200
Subject: fbcon: Add an option to disable fbcon in panic

This is required to avoid conflict between DRM_PANIC, and fbcon. If
a drm device already handle panic with drm_panic, it should set
the skip_panic field in fb_info, so that fbcon will stay quiet, and
not overwrite the panic_screen.

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240717090102.968152-3-jfalempe@redhat.com
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index db7d97b10964..865dad03e73e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -510,6 +510,7 @@ struct fb_info {
 	void *par;
 
 	bool skip_vt_switch; /* no VT switch on suspend/resume required */
+	bool skip_panic; /* Do not write to the fb after a panic */
 };
 
 /* This will go away
-- 
cgit v1.2.3


From a838e5dca63d1dc701e63b2b1176943c57485c45 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Mon, 15 Jul 2024 21:05:32 +0800
Subject: quota: remove unneeded return value of register_quota_format

The register_quota_format always returns 0, simply remove unneeded return
value.

Link: https://patch.msgid.link/20240715130534.2112678-3-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quota.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 07071e64abf3..89a0d83ddad0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -526,7 +526,7 @@ struct quota_info {
 	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
 };
 
-int register_quota_format(struct quota_format_type *fmt);
+void register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
 
 struct quota_module_name {
-- 
cgit v1.2.3


From d5e79eeba3086a52593b295ac4bf6eddd64d4aad Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Sat, 20 Jul 2024 15:15:42 +0800
Subject: dma-buf: heaps: Deduplicate docs and adopt common format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The docs for dma_heap_get_name were incorrect, and since they were
duplicated in the header they were wrong there too.

The docs formatting was inconsistent so I tried to make it more
consistent across functions since I'm already in here doing cleanup.

Remove multiple unused includes and alphabetize.

Signed-off-by: T.J. Mercier <tjmercier@google.com>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
[Yong: Just add a comment for "priv" to mute build warning]
Signed-off-by: Yunfei Dong <yunfei.dong@mediatek.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240720071606.27930-5-yunfei.dong@mediatek.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-heap.h | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h
index 064bad725061..27d15f60950a 100644
--- a/include/linux/dma-heap.h
+++ b/include/linux/dma-heap.h
@@ -9,14 +9,13 @@
 #ifndef _DMA_HEAPS_H
 #define _DMA_HEAPS_H
 
-#include <linux/cdev.h>
 #include <linux/types.h>
 
 struct dma_heap;
 
 /**
  * struct dma_heap_ops - ops to operate on a given heap
- * @allocate:		allocate dmabuf and return struct dma_buf ptr
+ * @allocate:	allocate dmabuf and return struct dma_buf ptr
  *
  * allocate returns dmabuf on success, ERR_PTR(-errno) on error.
  */
@@ -41,28 +40,10 @@ struct dma_heap_export_info {
 	void *priv;
 };
 
-/**
- * dma_heap_get_drvdata() - get per-heap driver data
- * @heap: DMA-Heap to retrieve private data for
- *
- * Returns:
- * The per-heap data for the heap.
- */
 void *dma_heap_get_drvdata(struct dma_heap *heap);
 
-/**
- * dma_heap_get_name() - get heap name
- * @heap: DMA-Heap to retrieve private data for
- *
- * Returns:
- * The char* for the heap name.
- */
 const char *dma_heap_get_name(struct dma_heap *heap);
 
-/**
- * dma_heap_add - adds a heap to dmabuf heaps
- * @exp_info:		information needed to register this heap
- */
 struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info);
 
 #endif /* _DMA_HEAPS_H */
-- 
cgit v1.2.3


From 7214da0ed2220a2b9ad22aa77a5974cdd2a62799 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Sun, 14 Jul 2024 23:55:02 +0300
Subject: drm/virtio: Add DRM capset definition

Define DRM native context capset in the VirtIO-GPU protocol header.

Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240714205502.3409718-1-dmitry.osipenko@collabora.com
---
 include/uapi/linux/virtio_gpu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0e21f3998108..bf2c9cabd207 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -311,6 +311,7 @@ struct virtio_gpu_cmd_submit {
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
 /* 3 is reserved for gfxstream */
 #define VIRTIO_GPU_CAPSET_VENUS 4
+#define VIRTIO_GPU_CAPSET_DRM 6
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
-- 
cgit v1.2.3


From 83b501c1799a96a41e163973e88826253ffadfb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 19 Jul 2024 17:24:14 +0200
Subject: drm/scheduler: remove full_recover from drm_sched_start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was basically just another one of amdgpus hacks. The parameter
allowed to restart the scheduler without turning fence signaling on
again.

That this is absolutely not a good idea should be obvious by now since
the fences will then just sit there and never signal.

While at it cleanup the code a bit.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240722083816.99685-1-christian.koenig@amd.com
---
 include/drm/gpu_scheduler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 5acc64954a88..fe8edb917360 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -579,7 +579,7 @@ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
-void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
+void drm_sched_start(struct drm_gpu_scheduler *sched);
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
 void drm_sched_increase_karma(struct drm_sched_job *bad);
 void drm_sched_reset_karma(struct drm_sched_job *bad);
-- 
cgit v1.2.3


From e06b71b2313a00579ba64a1cc43ad29d64cb8d4c Mon Sep 17 00:00:00 2001
From: Jonathan Kim <Jonathan.Kim@amd.com>
Date: Tue, 21 May 2024 13:22:15 -0400
Subject: drm/amdkfd: allow users to target recommended SDMA engines

Certain GPUs have better copy performance over xGMI on specific
SDMA engines depending on the source and destination GPU.
Allow users to create SDMA queues on these recommended engines.
Close to 2x overall performance has been observed with this
optimization.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 285a36601dc9..71a7ce5f2d4c 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -42,9 +42,10 @@
  * - 1.14 - Update kfd_event_data
  * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
  * - 1.16 - Add contiguous VRAM allocation flag
+ * - 1.17 - Add SDMA queue creation with target SDMA engine ID
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 16
+#define KFD_IOCTL_MINOR_VERSION 17
 
 struct kfd_ioctl_get_version_args {
 	__u32 major_version;	/* from KFD */
@@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args {
 #define KFD_IOC_QUEUE_TYPE_SDMA			0x1
 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL		0x2
 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI		0x3
+#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID	0x4
 
 #define KFD_MAX_QUEUE_PERCENTAGE	100
 #define KFD_MAX_QUEUE_PRIORITY		15
@@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args {
 	__u64 ctx_save_restore_address; /* to KFD */
 	__u32 ctx_save_restore_size;	/* to KFD */
 	__u32 ctl_stack_size;		/* to KFD */
+	__u32 sdma_engine_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_destroy_queue_args {
-- 
cgit v1.2.3


From 564429a6bd8d26065b2cccffcaa9485359f74de7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Jul 2024 18:27:47 -0400
Subject: KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_*

Add "ARCH" to the symbols; shortly, the "prepare" phase will include both
the arch-independent step to clear out contents left in the page by the
host, and the arch-dependent step enabled by CONFIG_HAVE_KVM_GMEM_PREPARE.
For consistency do the same for CONFIG_HAVE_KVM_GMEM_INVALIDATE as well.

Reviewed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 689e8be873a7..344d90771844 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2445,7 +2445,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 }
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
-#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
 bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
 #endif
@@ -2477,7 +2477,7 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
 long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
 		       kvm_gmem_populate_cb post_populate, void *opaque);
 
-#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
 void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
 #endif
 
-- 
cgit v1.2.3


From 7239ed74677af143857d1a96d402476446a0995a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Jul 2024 18:27:51 -0400
Subject: KVM: remove kvm_arch_gmem_prepare_needed()

It is enough to return 0 if a guest need not do any preparation.
This is in fact how sev_gmem_prepare() works for non-SNP guests,
and it extends naturally to Intel hosts: the x86 callback for
gmem_prepare is optional and returns 0 if not defined.

Reviewed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 344d90771844..45373d42f314 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2447,7 +2447,6 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 
 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
-bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
 #endif
 
 /**
-- 
cgit v1.2.3


From 4b5f67120a88c713b82907d55a767693382e9e9d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Jul 2024 18:27:54 -0400
Subject: KVM: extend kvm_range_has_memory_attributes() to check subset of
 attributes

While currently there is no other attribute than KVM_MEMORY_ATTRIBUTE_PRIVATE,
KVM code such as kvm_mem_is_private() is written to expect their existence.
Allow using kvm_range_has_memory_attributes() as a multi-page version of
kvm_mem_is_private(), without it breaking later when more attributes are
introduced.

Reviewed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45373d42f314..c223b97df03e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
 }
 
 bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
-				     unsigned long attrs);
+				     unsigned long mask, unsigned long attrs);
 bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
 					struct kvm_gfn_range *range);
 bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
-- 
cgit v1.2.3


From e4ee5447927377c55777b73fe497a2455a25f948 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Jul 2024 18:27:55 -0400
Subject: KVM: guest_memfd: let kvm_gmem_populate() operate only on private
 gfns

This check is currently performed by sev_gmem_post_populate(), but it
applies to all callers of kvm_gmem_populate(): the point of the function
is that the memory is being encrypted and some work has to be done
on all the gfns in order to encrypt them.

Therefore, check the KVM_MEMORY_ATTRIBUTE_PRIVATE attribute prior
to invoking the callback, and stop the operation if a shared page
is encountered.  Because CONFIG_KVM_PRIVATE_MEM in principle does
not require attributes, this makes kvm_gmem_populate() depend on
CONFIG_KVM_GENERIC_PRIVATE_MEM (which does require them).

Reviewed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c223b97df03e..79a6b1a63027 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2449,6 +2449,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
 #endif
 
+#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
 /**
  * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
  *
@@ -2475,6 +2476,7 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
 
 long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
 		       kvm_gmem_populate_cb post_populate, void *opaque);
+#endif
 
 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
 void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
-- 
cgit v1.2.3


From 0d5040e406d2c4404d26b841c4aa34cec0bf1088 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Thu, 25 Jul 2024 16:51:08 -0400
Subject: drm/vblank: add dynamic per-crtc vblank configuration support

We would like to be able to enable vblank_disable_immediate
unconditionally, however there are a handful of cases where a small off
delay is necessary (e.g. with PSR enabled). So, we would like to be able
to adjust the vblank off delay and disable imminent values dynamically
for a given CRTC. Since, it will allow drivers to apply static screen
optimizations more quickly and consequently allow users to benefit more
so from the power savings afforded by the aforementioned optimizations,
while avoiding issues in cases where an off delay is still warranted.
In particular, the PSR case requires a small off delay of 2 frames,
otherwise display firmware isn't able to keep up with all of the
requests made to amdgpu. So, introduce drm_crtc_vblank_on_config() which
is like drm_crtc_vblank_on(), but it allows drivers to specify the
vblank CRTC configuration before enabling vblanking support for a given
CRTC.

Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240725205109.209743-1-hamza.mahfooz@amd.com
---
 include/drm/drm_device.h |  5 +++--
 include/drm/drm_vblank.h | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 63767cf24371..c91f87b5242d 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -213,8 +213,9 @@ struct drm_device {
 	 * This can be set to true it the hardware has a working vblank counter
 	 * with high-precision timestamping (otherwise there are races) and the
 	 * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off()
-	 * appropriately. See also @max_vblank_count and
-	 * &drm_crtc_funcs.get_vblank_counter.
+	 * appropriately. Also, see @max_vblank_count,
+	 * &drm_crtc_funcs.get_vblank_counter and
+	 * &drm_vblank_crtc_config.disable_immediate.
 	 */
 	bool vblank_disable_immediate;
 
diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h
index c8f829b4307c..151ab1e85b1b 100644
--- a/include/drm/drm_vblank.h
+++ b/include/drm/drm_vblank.h
@@ -78,6 +78,31 @@ struct drm_pending_vblank_event {
 	} event;
 };
 
+/**
+ * struct drm_vblank_crtc_config - vblank configuration for a CRTC
+ */
+struct drm_vblank_crtc_config {
+	/**
+	 * @offdelay_ms: Vblank off delay in ms, used to determine how long
+	 * &drm_vblank_crtc.disable_timer waits before disabling.
+	 *
+	 * Defaults to the value of drm_vblank_offdelay in drm_crtc_vblank_on().
+	 */
+	int offdelay_ms;
+
+	/**
+	 * @disable_immediate: See &drm_device.vblank_disable_immediate
+	 * for the exact semantics of immediate vblank disabling.
+	 *
+	 * Additionally, this tracks the disable immediate value per crtc, just
+	 * in case it needs to differ from the default value for a given device.
+	 *
+	 * Defaults to the value of &drm_device.vblank_disable_immediate in
+	 * drm_crtc_vblank_on().
+	 */
+	bool disable_immediate;
+};
+
 /**
  * struct drm_vblank_crtc - vblank tracking for a CRTC
  *
@@ -99,8 +124,8 @@ struct drm_vblank_crtc {
 	wait_queue_head_t queue;
 	/**
 	 * @disable_timer: Disable timer for the delayed vblank disabling
-	 * hysteresis logic. Vblank disabling is controlled through the
-	 * drm_vblank_offdelay module option and the setting of the
+	 * hysteresis logic. Vblank disabling is controlled through
+	 * &drm_vblank_crtc_config.offdelay_ms and the setting of the
 	 * &drm_device.max_vblank_count value.
 	 */
 	struct timer_list disable_timer;
@@ -198,6 +223,12 @@ struct drm_vblank_crtc {
 	 */
 	struct drm_display_mode hwmode;
 
+	/**
+	 * @config: Stores vblank configuration values for a given CRTC.
+	 * Also, see drm_crtc_vblank_on_config().
+	 */
+	struct drm_vblank_crtc_config config;
+
 	/**
 	 * @enabled: Tracks the enabling state of the corresponding &drm_crtc to
 	 * avoid double-disabling and hence corrupting saved state. Needed by
@@ -247,6 +278,8 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe);
 void drm_crtc_wait_one_vblank(struct drm_crtc *crtc);
 void drm_crtc_vblank_off(struct drm_crtc *crtc);
 void drm_crtc_vblank_reset(struct drm_crtc *crtc);
+void drm_crtc_vblank_on_config(struct drm_crtc *crtc,
+			       const struct drm_vblank_crtc_config *config);
 void drm_crtc_vblank_on(struct drm_crtc *crtc);
 u64 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc);
 void drm_crtc_vblank_restore(struct drm_crtc *crtc);
-- 
cgit v1.2.3


From 23a55f4492fcf868d068da31a2cd30c15f46207d Mon Sep 17 00:00:00 2001
From: Ofir Gal <ofir.gal@volumez.com>
Date: Thu, 18 Jul 2024 11:45:12 +0300
Subject: net: introduce helper sendpages_ok()

Network drivers are using sendpage_ok() to check the first page of an
iterator in order to disable MSG_SPLICE_PAGES. The iterator can
represent list of contiguous pages.

When MSG_SPLICE_PAGES is enabled skb_splice_from_iter() is being used,
it requires all pages in the iterator to be sendable. Therefore it needs
to check that each page is sendable.

The patch introduces a helper sendpages_ok(), it returns true if all the
contiguous pages are sendable.

Drivers who want to send contiguous pages with MSG_SPLICE_PAGES may use
this helper to check whether the page list is OK. If the helper does not
return true, the driver should remove MSG_SPLICE_PAGES flag.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Ofir Gal <ofir.gal@volumez.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240718084515.3833733-2-ofir.gal@volumez.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/net.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 688320b79fcc..b75bc534c1b3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -322,6 +322,25 @@ static inline bool sendpage_ok(struct page *page)
 	return !PageSlab(page) && page_count(page) >= 1;
 }
 
+/*
+ * Check sendpage_ok on contiguous pages.
+ */
+static inline bool sendpages_ok(struct page *page, size_t len, size_t offset)
+{
+	struct page *p = page + (offset >> PAGE_SHIFT);
+	size_t count = 0;
+
+	while (count < len) {
+		if (!sendpage_ok(p))
+			return false;
+
+		p++;
+		count += PAGE_SIZE;
+	}
+
+	return true;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
-- 
cgit v1.2.3


From 7543ae2269a855683a39af57048035f44bc8ef9c Mon Sep 17 00:00:00 2001
From: Wouter Verhelst <w@uter.be>
Date: Thu, 25 Jul 2024 18:45:36 +0200
Subject: nbd: add support for rotational devices

The NBD protocol defines the flag NBD_FLAG_ROTATIONAL to flag that the
export in use should be treated as a rotational device.

Add support for that flag to the kernel driver.

Signed-off-by: Wouter Verhelst <w@uter.be>
Reviewed-by: Eric Blake <eblake@redhat.com>
Link: https://lore.kernel.org/r/20240725164536.1275851-1-w@uter.be
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/nbd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 80ce0ef43afd..d75215f2c675 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -51,8 +51,9 @@ enum {
 #define NBD_FLAG_READ_ONLY	(1 << 1) /* device is read-only */
 #define NBD_FLAG_SEND_FLUSH	(1 << 2) /* can flush writeback cache */
 #define NBD_FLAG_SEND_FUA	(1 << 3) /* send FUA (forced unit access) */
-/* there is a gap here to match userspace */
+#define NBD_FLAG_ROTATIONAL	(1 << 4) /* device is rotational */
 #define NBD_FLAG_SEND_TRIM	(1 << 5) /* send trim/discard */
+/* there is a gap here to match userspace */
 #define NBD_FLAG_CAN_MULTI_CONN	(1 << 8)	/* Server supports multiple connections per export. */
 
 /* values for cmd flags in the upper 16 bits of request type */
-- 
cgit v1.2.3


From 1a251f52cfdc417c84411a056bc142cbd77baef4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Jul 2024 15:49:18 -0700
Subject: minmax: make generic MIN() and MAX() macros available everywhere

This just standardizes the use of MIN() and MAX() macros, with the very
traditional semantics.  The goal is to use these for C constant
expressions and for top-level / static initializers, and so be able to
simplify the min()/max() macros.

These macro names were used by various kernel code - they are very
traditional, after all - and all such users have been fixed up, with a
few different approaches:

 - trivial duplicated macro definitions have been removed

   Note that 'trivial' here means that it's obviously kernel code that
   already included all the major kernel headers, and thus gets the new
   generic MIN/MAX macros automatically.

 - non-trivial duplicated macro definitions are guarded with #ifndef

   This is the "yes, they define their own versions, but no, the include
   situation is not entirely obvious, and maybe they don't get the
   generic version automatically" case.

 - strange use case #1

   A couple of drivers decided that the way they want to describe their
   versioning is with

	#define MAJ 1
	#define MIN 2
	#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN)

   which adds zero value and I just did my Alexander the Great
   impersonation, and rewrote that pointless Gordian knot as

	#define DRV_VERSION "1.2"

   instead.

 - strange use case #2

   A couple of drivers thought that it's a good idea to have a random
   'MIN' or 'MAX' define for a value or index into a table, rather than
   the traditional macro that takes arguments.

   These values were re-written as C enum's instead. The new
   function-line macros only expand when followed by an open
   parenthesis, and thus don't clash with enum use.

Happily, there weren't really all that many of these cases, and a lot of
users already had the pattern of using '#ifndef' guarding (or in one
case just using '#undef MIN') before defining their own private version
that does the same thing. I left such cases alone.

Cc: David Laight <David.Laight@aculab.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/minmax.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 9c2848abc804..fc384714da45 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -277,6 +277,8 @@ static inline bool in_range32(u32 val, u32 start, u32 len)
  * Use these carefully: no type checking, and uses the arguments
  * multiple times. Use for obvious constants only.
  */
+#define MIN(a,b) __cmp(min,a,b)
+#define MAX(a,b) __cmp(max,a,b)
 #define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b))
 #define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b))
 
-- 
cgit v1.2.3


From aaa5e1aa39074fb466f6ef3df4de6903741dfeec Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 26 Jul 2024 17:52:36 +0200
Subject: ASoC: Use __counted_by() annotation for snd_soc_pcm_runtime

The struct snd_soc_pcm_runtime has a flex array of snd_soc_component
objects at its end, and the size is kept in num_components field.
We can add __counted_by() annotation for compiler's assistance to
catch array overflows.

A slight additional change is the assignment of rtd->components[];
the array counter has to be incremented at first for avoiding
false-positive reports from compilers.

Also, the allocation size of snd_soc_pcm_runtime is cleaned up with
the standard struct_size() helper, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240726155237.21961-1-tiwai@suse.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index a8e66bbf932b..e844f6afc5b5 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1209,8 +1209,9 @@ struct snd_soc_pcm_runtime {
 
 	bool initialized;
 
+	/* CPU/Codec/Platform */
 	int num_components;
-	struct snd_soc_component *components[]; /* CPU/Codec/Platform */
+	struct snd_soc_component *components[] __counted_by(num_components);
 };
 
 /* see soc_new_pcm_runtime()  */
-- 
cgit v1.2.3


From f58872f45c36ded048bccc22701b0986019c24d8 Mon Sep 17 00:00:00 2001
From: Marcelo Schmitt <marcelo.schmitt@analog.com>
Date: Fri, 12 Jul 2024 16:20:36 -0300
Subject: spi: Enable controllers to extend the SPI protocol with MOSI idle
 configuration

The behavior of an SPI controller data output line (SDO or MOSI or COPI
(Controller Output Peripheral Input) for disambiguation) is usually not
specified when the controller is not clocking out data on SCLK edges.
However, there do exist SPI peripherals that require specific MOSI line
state when data is not being clocked out of the controller.

Conventional SPI controllers may set the MOSI line on SCLK edges then bring
it low when no data is going out or leave the line the state of the last
transfer bit. More elaborated controllers are capable to set the MOSI idle
state according to different configurable levels and thus are more suitable
for interfacing with demanding peripherals.

Add SPI mode bits to allow peripherals to request explicit MOSI idle state
when needed.

When supporting a particular MOSI idle configuration, the data output line
state is expected to remain at the configured level when the controller is
not clocking out data. When a device that needs a specific MOSI idle state
is identified, its driver should request the MOSI idle configuration by
setting the proper SPI mode bit.

Acked-by: Nuno Sa <nuno.sa@analog.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Tested-by: David Lechner <dlechner@baylibre.com>
Signed-off-by: Marcelo Schmitt <marcelo.schmitt@analog.com>
Link: https://patch.msgid.link/9802160b5e5baed7f83ee43ac819cb757a19be55.1720810545.git.marcelo.schmitt@analog.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/linux/spi/spi.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/spi/spi.h b/include/uapi/linux/spi/spi.h
index ca56e477d161..ee4ac812b8f8 100644
--- a/include/uapi/linux/spi/spi.h
+++ b/include/uapi/linux/spi/spi.h
@@ -28,7 +28,8 @@
 #define	SPI_RX_OCTAL		_BITUL(14)	/* receive with 8 wires */
 #define	SPI_3WIRE_HIZ		_BITUL(15)	/* high impedance turnaround */
 #define	SPI_RX_CPHA_FLIP	_BITUL(16)	/* flip CPHA on Rx only xfer */
-#define SPI_MOSI_IDLE_LOW	_BITUL(17)	/* leave mosi line low when idle */
+#define SPI_MOSI_IDLE_LOW	_BITUL(17)	/* leave MOSI line low when idle */
+#define SPI_MOSI_IDLE_HIGH	_BITUL(18)	/* leave MOSI line high when idle */
 
 /*
  * All the bits defined above should be covered by SPI_MODE_USER_MASK.
@@ -38,6 +39,6 @@
  * These bits must not overlap. A static assert check should make sure of that.
  * If adding extra bits, make sure to increase the bit index below as well.
  */
-#define SPI_MODE_USER_MASK	(_BITUL(18) - 1)
+#define SPI_MODE_USER_MASK	(_BITUL(19) - 1)
 
 #endif /* _UAPI_SPI_H */
-- 
cgit v1.2.3


From 320f6693097bf89d67f9cabad24a2b911e23073f Mon Sep 17 00:00:00 2001
From: Marcelo Schmitt <marcelo.schmitt@analog.com>
Date: Fri, 12 Jul 2024 16:21:03 -0300
Subject: spi: bitbang: Implement support for MOSI idle state configuration

Some SPI peripherals may require strict MOSI line state when the controller
is not clocking out data. Implement support for MOSI idle state
configuration (low or high) by setting the data output line level on
controller setup and after transfers. Bitbang operations now call
controller specific set_mosi_idle() callback to set MOSI to its idle state.
The MOSI line is kept at its idle state if no tx buffer is provided.

Acked-by: Nuno Sa <nuno.sa@analog.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Marcelo Schmitt <marcelo.schmitt@analog.com>
Link: https://patch.msgid.link/de61a600b56ed9cb714d5ea87afa88948e70041e.1720810545.git.marcelo.schmitt@analog.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi_bitbang.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index d4cb83195f7a..c92cd43a47f4 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -24,6 +24,7 @@ struct spi_bitbang {
 #define	BITBANG_CS_ACTIVE	1	/* normally nCS, active low */
 #define	BITBANG_CS_INACTIVE	0
 
+	void	(*set_mosi_idle)(struct spi_device *spi);
 	/* txrx_bufs() may handle dma mapping for transfers that don't
 	 * already have one (transfer.{tx,rx}_dma is zero), or use PIO
 	 */
-- 
cgit v1.2.3


From d65d411c9259a2499081e1e7ed91088232666b57 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 25 Jul 2023 12:08:50 +0100
Subject: treewide: context_tracking: Rename CONTEXT_* into CT_STATE_*

Context tracking state related symbols currently use a mix of the
CONTEXT_ (e.g. CONTEXT_KERNEL) and CT_SATE_ (e.g. CT_STATE_MASK) prefixes.

Clean up the naming and make the ctx_state enum use the CT_STATE_ prefix.

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking.h       | 16 ++++++++--------
 include/linux/context_tracking_state.h | 20 ++++++++++----------
 include/linux/entry-common.h           |  2 +-
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 6e76b9dba00e..28fcfa184903 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -26,26 +26,26 @@ extern void user_exit_callable(void);
 static inline void user_enter(void)
 {
 	if (context_tracking_enabled())
-		ct_user_enter(CONTEXT_USER);
+		ct_user_enter(CT_STATE_USER);
 
 }
 static inline void user_exit(void)
 {
 	if (context_tracking_enabled())
-		ct_user_exit(CONTEXT_USER);
+		ct_user_exit(CT_STATE_USER);
 }
 
 /* Called with interrupts disabled.  */
 static __always_inline void user_enter_irqoff(void)
 {
 	if (context_tracking_enabled())
-		__ct_user_enter(CONTEXT_USER);
+		__ct_user_enter(CT_STATE_USER);
 
 }
 static __always_inline void user_exit_irqoff(void)
 {
 	if (context_tracking_enabled())
-		__ct_user_exit(CONTEXT_USER);
+		__ct_user_exit(CT_STATE_USER);
 }
 
 static inline enum ctx_state exception_enter(void)
@@ -57,7 +57,7 @@ static inline enum ctx_state exception_enter(void)
 		return 0;
 
 	prev_ctx = __ct_state();
-	if (prev_ctx != CONTEXT_KERNEL)
+	if (prev_ctx != CT_STATE_KERNEL)
 		ct_user_exit(prev_ctx);
 
 	return prev_ctx;
@@ -67,7 +67,7 @@ static inline void exception_exit(enum ctx_state prev_ctx)
 {
 	if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) &&
 	    context_tracking_enabled()) {
-		if (prev_ctx != CONTEXT_KERNEL)
+		if (prev_ctx != CT_STATE_KERNEL)
 			ct_user_enter(prev_ctx);
 	}
 }
@@ -75,7 +75,7 @@ static inline void exception_exit(enum ctx_state prev_ctx)
 static __always_inline bool context_tracking_guest_enter(void)
 {
 	if (context_tracking_enabled())
-		__ct_user_enter(CONTEXT_GUEST);
+		__ct_user_enter(CT_STATE_GUEST);
 
 	return context_tracking_enabled_this_cpu();
 }
@@ -83,7 +83,7 @@ static __always_inline bool context_tracking_guest_enter(void)
 static __always_inline void context_tracking_guest_exit(void)
 {
 	if (context_tracking_enabled())
-		__ct_user_exit(CONTEXT_GUEST);
+		__ct_user_exit(CT_STATE_GUEST);
 }
 
 #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index bbff5f7f8803..f1c53125edee 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -10,18 +10,18 @@
 #define DYNTICK_IRQ_NONIDLE	((LONG_MAX / 2) + 1)
 
 enum ctx_state {
-	CONTEXT_DISABLED	= -1,	/* returned by ct_state() if unknown */
-	CONTEXT_KERNEL		= 0,
-	CONTEXT_IDLE		= 1,
-	CONTEXT_USER		= 2,
-	CONTEXT_GUEST		= 3,
-	CONTEXT_MAX		= 4,
+	CT_STATE_DISABLED	= -1,	/* returned by ct_state() if unknown */
+	CT_STATE_KERNEL		= 0,
+	CT_STATE_IDLE		= 1,
+	CT_STATE_USER		= 2,
+	CT_STATE_GUEST		= 3,
+	CT_STATE_MAX		= 4,
 };
 
 /* Even value for idle, else odd. */
-#define RCU_DYNTICKS_IDX CONTEXT_MAX
+#define RCU_DYNTICKS_IDX CT_STATE_MAX
 
-#define CT_STATE_MASK (CONTEXT_MAX - 1)
+#define CT_STATE_MASK (CT_STATE_MAX - 1)
 #define CT_DYNTICKS_MASK (~CT_STATE_MASK)
 
 struct context_tracking {
@@ -123,14 +123,14 @@ static inline bool context_tracking_enabled_this_cpu(void)
  *
  * Returns the current cpu's context tracking state if context tracking
  * is enabled.  If context tracking is disabled, returns
- * CONTEXT_DISABLED.  This should be used primarily for debugging.
+ * CT_STATE_DISABLED.  This should be used primarily for debugging.
  */
 static __always_inline int ct_state(void)
 {
 	int ret;
 
 	if (!context_tracking_enabled())
-		return CONTEXT_DISABLED;
+		return CT_STATE_DISABLED;
 
 	preempt_disable();
 	ret = __ct_state();
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index b0fb775a600d..1e50cdb83ae5 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -108,7 +108,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 	arch_enter_from_user_mode(regs);
 	lockdep_hardirqs_off(CALLER_ADDR0);
 
-	CT_WARN_ON(__ct_state() != CONTEXT_USER);
+	CT_WARN_ON(__ct_state() != CT_STATE_USER);
 	user_exit_irqoff();
 
 	instrumentation_begin();
-- 
cgit v1.2.3


From 4aa35e0db6d758e8f952ed30d7ac3117c376562c Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 25 Jul 2023 12:19:01 +0100
Subject: context_tracking, rcu: Rename RCU_DYNTICKS_IDX into CT_RCU_WATCHING

The symbols relating to the CT_STATE part of context_tracking.state are now
all prefixed with CT_STATE.

The RCU dynticks counter part of that atomic variable still involves
symbols with different prefixes, align them all to be prefixed with
CT_RCU_WATCHING.

Suggested-by: "Paul E. McKenney" <paulmck@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking.h       |  6 +++---
 include/linux/context_tracking_state.h | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 28fcfa184903..a6c36780cc3b 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -119,7 +119,7 @@ extern void ct_idle_exit(void);
  */
 static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
-	return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX);
+	return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING);
 }
 
 /*
@@ -142,7 +142,7 @@ static __always_inline bool warn_rcu_enter(void)
 	preempt_disable_notrace();
 	if (rcu_dynticks_curr_cpu_in_eqs()) {
 		ret = true;
-		ct_state_inc(RCU_DYNTICKS_IDX);
+		ct_state_inc(CT_RCU_WATCHING);
 	}
 
 	return ret;
@@ -151,7 +151,7 @@ static __always_inline bool warn_rcu_enter(void)
 static __always_inline void warn_rcu_exit(bool rcu)
 {
 	if (rcu)
-		ct_state_inc(RCU_DYNTICKS_IDX);
+		ct_state_inc(CT_RCU_WATCHING);
 	preempt_enable_notrace();
 }
 
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index f1c53125edee..94d6a935af3b 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -18,11 +18,11 @@ enum ctx_state {
 	CT_STATE_MAX		= 4,
 };
 
-/* Even value for idle, else odd. */
-#define RCU_DYNTICKS_IDX CT_STATE_MAX
+/* Odd value for watching, else even. */
+#define CT_RCU_WATCHING CT_STATE_MAX
 
 #define CT_STATE_MASK (CT_STATE_MAX - 1)
-#define CT_DYNTICKS_MASK (~CT_STATE_MASK)
+#define CT_RCU_WATCHING_MASK (~CT_STATE_MASK)
 
 struct context_tracking {
 #ifdef CONFIG_CONTEXT_TRACKING_USER
@@ -58,21 +58,21 @@ static __always_inline int __ct_state(void)
 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
 static __always_inline int ct_dynticks(void)
 {
-	return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK;
+	return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK;
 }
 
 static __always_inline int ct_dynticks_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-	return atomic_read(&ct->state) & CT_DYNTICKS_MASK;
+	return atomic_read(&ct->state) & CT_RCU_WATCHING_MASK;
 }
 
 static __always_inline int ct_dynticks_cpu_acquire(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-	return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK;
+	return atomic_read_acquire(&ct->state) & CT_RCU_WATCHING_MASK;
 }
 
 static __always_inline long ct_dynticks_nesting(void)
-- 
cgit v1.2.3


From a4a7921ec08d016f9d692752e2f82f66369c7ffa Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 10:45:56 +0200
Subject: context_tracking, rcu: Rename ct_dynticks() into ct_rcu_watching()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 94d6a935af3b..cb90d8c17810 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -56,7 +56,7 @@ static __always_inline int __ct_state(void)
 #endif
 
 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
-static __always_inline int ct_dynticks(void)
+static __always_inline int ct_rcu_watching(void)
 {
 	return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK;
 }
-- 
cgit v1.2.3


From a9fde9d1a5dd42edadf61cf4e64aa234b4c5bd3f Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 10:47:14 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_cpu() into
 ct_rcu_watching_cpu()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index cb90d8c17810..ad5a06a42b4a 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -61,7 +61,7 @@ static __always_inline int ct_rcu_watching(void)
 	return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK;
 }
 
-static __always_inline int ct_dynticks_cpu(int cpu)
+static __always_inline int ct_rcu_watching_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-- 
cgit v1.2.3


From 125716c393889f9035567d4d78da239483f63ece Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 14:47:12 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_cpu_acquire() into
 ct_rcu_watching_cpu_acquire()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index ad5a06a42b4a..ad6570ffeff3 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -68,7 +68,7 @@ static __always_inline int ct_rcu_watching_cpu(int cpu)
 	return atomic_read(&ct->state) & CT_RCU_WATCHING_MASK;
 }
 
-static __always_inline int ct_dynticks_cpu_acquire(int cpu)
+static __always_inline int ct_rcu_watching_cpu_acquire(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-- 
cgit v1.2.3


From 7aeba709a048d870c15940af8b620b16281c3b9e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 30 May 2024 15:45:42 +0200
Subject: rcu/nocb: Introduce RCU_NOCB_LOCKDEP_WARN()

Checking for races against concurrent (de-)offloading implies the
creation of !CONFIG_RCU_NOCB_CPU stubs to check if each relevant lock
is held. For now this only implies the nocb_lock but more are to be
expected.

Create instead a NOCB specific version of RCU_LOCKDEP_WARN() to avoid
the proliferation of stubs.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcupdate.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 13f6f00aecf9..d48d3c237305 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -144,11 +144,18 @@ void rcu_init_nohz(void);
 int rcu_nocb_cpu_offload(int cpu);
 int rcu_nocb_cpu_deoffload(int cpu);
 void rcu_nocb_flush_deferred_wakeup(void);
+
+#define RCU_NOCB_LOCKDEP_WARN(c, s) RCU_LOCKDEP_WARN(c, s)
+
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
+
 static inline void rcu_init_nohz(void) { }
 static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; }
 static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
 static inline void rcu_nocb_flush_deferred_wakeup(void) { }
+
+#define RCU_NOCB_LOCKDEP_WARN(c, s)
+
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /*
-- 
cgit v1.2.3


From bae6076ebbd14cc1f1bd4de65c2db21d3ab109d7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 30 May 2024 15:45:50 +0200
Subject: rcu/nocb: Remove SEGCBLIST_RCU_CORE

RCU core can't be running anymore while in the middle of (de-)offloading
since this sort of transition now only applies to offline CPUs.

The SEGCBLIST_RCU_CORE state can therefore be removed.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcu_segcblist.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ba95c06675e1..5469c54cd778 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -185,11 +185,10 @@ struct rcu_cblist {
  *  ----------------------------------------------------------------------------
  */
 #define SEGCBLIST_ENABLED	BIT(0)
-#define SEGCBLIST_RCU_CORE	BIT(1)
-#define SEGCBLIST_LOCKING	BIT(2)
-#define SEGCBLIST_KTHREAD_CB	BIT(3)
-#define SEGCBLIST_KTHREAD_GP	BIT(4)
-#define SEGCBLIST_OFFLOADED	BIT(5)
+#define SEGCBLIST_LOCKING	BIT(1)
+#define SEGCBLIST_KTHREAD_CB	BIT(2)
+#define SEGCBLIST_KTHREAD_GP	BIT(3)
+#define SEGCBLIST_OFFLOADED	BIT(4)
 
 struct rcu_segcblist {
 	struct rcu_head *head;
-- 
cgit v1.2.3


From 91e43b9044a4f9b6976dc28b3f1446b733cc68de Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 30 May 2024 15:45:51 +0200
Subject: rcu/nocb: Remove SEGCBLIST_KTHREAD_CB

This state excerpt from the (de-)offloading state machine was used to
implement an ad-hoc kthread parking of rcuo kthreads. This code has
been removed and therefore the related state can be erased as well.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcu_segcblist.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 5469c54cd778..1ef1bb54853d 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -186,9 +186,8 @@ struct rcu_cblist {
  */
 #define SEGCBLIST_ENABLED	BIT(0)
 #define SEGCBLIST_LOCKING	BIT(1)
-#define SEGCBLIST_KTHREAD_CB	BIT(2)
-#define SEGCBLIST_KTHREAD_GP	BIT(3)
-#define SEGCBLIST_OFFLOADED	BIT(4)
+#define SEGCBLIST_KTHREAD_GP	BIT(2)
+#define SEGCBLIST_OFFLOADED	BIT(3)
 
 struct rcu_segcblist {
 	struct rcu_head *head;
-- 
cgit v1.2.3


From dc1c8034e31b14a2e5e212104ec508aec44ce1b9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Jul 2024 20:24:12 -0700
Subject: minmax: simplify min()/max()/clamp() implementation

Now that we no longer have any C constant expression contexts (ie array
size declarations or static initializers) that use min() or max(), we
can simpify the implementation by not having to worry about the result
staying as a C constant expression.

So now we can unconditionally just use temporary variables of the right
type, and get rid of the excessive expansion that used to come from the
use of

   __builtin_choose_expr(__is_constexpr(...), ..

to pick the specialized code for constant expressions.

Another expansion simplification is to pass the temporary variables (in
addition to the original expression) to our __types_ok() macro.  That
may superficially look like it complicates the macro, but when we only
want the type of the expression, expanding the temporary variable names
is much simpler and smaller than expanding the potentially complicated
original expression.

As a result, on my machine, doing a

  $ time make drivers/staging/media/atomisp/pci/isp/kernels/ynr/ynr_1.0/ia_css_ynr.host.i

goes from

	real	0m16.621s
	user	0m15.360s
	sys	0m1.221s

to

	real	0m2.532s
	user	0m2.091s
	sys	0m0.452s

because the token expansion goes down dramatically.

In particular, the longest line expansion (which was line 71 of that
'ia_css_ynr.host.c' file) shrinks from 23,338kB (yes, 23MB for one
single line) to "just" 1,444kB (now "only" 1.4MB).

And yes, that line is still the line from hell, because it's doing
multiple levels of "min()/max()" expansion thanks to some of them being
hidden inside the uDIGIT_FITTING() macro.

Lorenzo has a nice cleanup patch that makes that driver use inline
functions instead of macros for sDIGIT_FITTING() and uDIGIT_FITTING(),
which will fix that line once and for all, but the 16-fold reduction in
this case does show why we need to simplify these helpers.

Cc: David Laight <David.Laight@aculab.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/minmax.h | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index fc384714da45..e3e4353df983 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -35,10 +35,10 @@
 #define __is_noneg_int(x)	\
 	(__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0)
 
-#define __types_ok(x, y) 					\
-	(__is_signed(x) == __is_signed(y) ||			\
-		__is_signed((x) + 0) == __is_signed((y) + 0) ||	\
-		__is_noneg_int(x) || __is_noneg_int(y))
+#define __types_ok(x, y, ux, uy) 				\
+	(__is_signed(ux) == __is_signed(uy) ||			\
+	 __is_signed((ux) + 0) == __is_signed((uy) + 0) ||	\
+	 __is_noneg_int(x) || __is_noneg_int(y))
 
 #define __cmp_op_min <
 #define __cmp_op_max >
@@ -51,34 +51,31 @@
 #define __cmp_once(op, type, x, y) \
 	__cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_))
 
-#define __careful_cmp_once(op, x, y) ({			\
-	static_assert(__types_ok(x, y),			\
+#define __careful_cmp_once(op, x, y, ux, uy) ({		\
+	__auto_type ux = (x); __auto_type uy = (y);	\
+	static_assert(__types_ok(x, y, ux, uy),		\
 		#op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \
-	__cmp_once(op, __auto_type, x, y); })
+	__cmp(op, ux, uy); })
 
-#define __careful_cmp(op, x, y)					\
-	__builtin_choose_expr(__is_constexpr((x) - (y)),	\
-		__cmp(op, x, y), __careful_cmp_once(op, x, y))
+#define __careful_cmp(op, x, y) \
+	__careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_))
 
 #define __clamp(val, lo, hi)	\
 	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
 
-#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
-	typeof(val) unique_val = (val);						\
-	typeof(lo) unique_lo = (lo);						\
-	typeof(hi) unique_hi = (hi);						\
+#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({				\
+	__auto_type uval = (val);						\
+	__auto_type ulo = (lo);							\
+	__auto_type uhi = (hi);							\
 	static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), 	\
 			(lo) <= (hi), true),					\
 		"clamp() low limit " #lo " greater than high limit " #hi);	\
-	static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error");	\
-	static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error");	\
-	__clamp(unique_val, unique_lo, unique_hi); })
-
-#define __careful_clamp(val, lo, hi) ({					\
-	__builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)),	\
-		__clamp(val, lo, hi),					\
-		__clamp_once(val, lo, hi, __UNIQUE_ID(__val),		\
-			     __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
+	static_assert(__types_ok(uval, lo, uval, ulo), "clamp() 'lo' signedness error");	\
+	static_assert(__types_ok(uval, hi, uval, uhi), "clamp() 'hi' signedness error");	\
+	__clamp(uval, ulo, uhi); })
+
+#define __careful_clamp(val, lo, hi) \
+	__clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_))
 
 /**
  * min - return minimum of two values of the same or compatible types
-- 
cgit v1.2.3


From 59c34008d3bdeef4c8ebc0ed2426109b474334d4 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Mon, 22 Jul 2024 14:58:01 +0530
Subject: x86/amd_nb: Add new PCI IDs for AMD family 1Ah model 60h

Add new PCI device IDs into the root IDs and miscellaneous IDs lists to
provide support for the latest generation of AMD 1Ah family 60h processor
models.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lore.kernel.org/r/20240722092801.3480266-1-Shyam-sundar.S-k@amd.com
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e388c8b1cbc2..91182aa1d2ec 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -580,6 +580,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
 #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
+#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3 0x124b
 #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb
 #define PCI_DEVICE_ID_AMD_MI200_DF_F3	0x14d3
 #define PCI_DEVICE_ID_AMD_MI300_DF_F3	0x152b
-- 
cgit v1.2.3


From 8423895d456672736a278d64f86d8b434eef2b54 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 14 May 2024 15:08:58 +0200
Subject: video: Handle HAS_IOPORT dependencies

In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at
compile time. We thus need to #ifdef functions and their callsites which
unconditionally use these I/O accessors. In the include/video/vga.h
these are conveniently all those functions with the vga_io_* prefix.

Co-developed-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/video/vga.h | 58 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/video/vga.h b/include/video/vga.h
index 947c0abd04ef..468764d6727a 100644
--- a/include/video/vga.h
+++ b/include/video/vga.h
@@ -197,9 +197,26 @@ struct vgastate {
 extern int save_vga(struct vgastate *state);
 extern int restore_vga(struct vgastate *state);
 
+static inline unsigned char vga_mm_r (void __iomem *regbase, unsigned short port)
+{
+	return readb (regbase + port);
+}
+
+static inline void vga_mm_w (void __iomem *regbase, unsigned short port, unsigned char val)
+{
+	writeb (val, regbase + port);
+}
+
+static inline void vga_mm_w_fast (void __iomem *regbase, unsigned short port,
+				  unsigned char reg, unsigned char val)
+{
+	writew (VGA_OUT16VAL (val, reg), regbase + port);
+}
+
 /*
  * generic VGA port read/write
  */
+#ifdef CONFIG_HAS_IOPORT
 
 static inline unsigned char vga_io_r (unsigned short port)
 {
@@ -217,22 +234,6 @@ static inline void vga_io_w_fast (unsigned short port, unsigned char reg,
 	outw(VGA_OUT16VAL (val, reg), port);
 }
 
-static inline unsigned char vga_mm_r (void __iomem *regbase, unsigned short port)
-{
-	return readb (regbase + port);
-}
-
-static inline void vga_mm_w (void __iomem *regbase, unsigned short port, unsigned char val)
-{
-	writeb (val, regbase + port);
-}
-
-static inline void vga_mm_w_fast (void __iomem *regbase, unsigned short port,
-				  unsigned char reg, unsigned char val)
-{
-	writew (VGA_OUT16VAL (val, reg), regbase + port);
-}
-
 static inline unsigned char vga_r (void __iomem *regbase, unsigned short port)
 {
 	if (regbase)
@@ -258,8 +259,25 @@ static inline void vga_w_fast (void __iomem *regbase, unsigned short port,
 	else
 		vga_io_w_fast (port, reg, val);
 }
+#else /* CONFIG_HAS_IOPORT */
+static inline unsigned char vga_r (void __iomem *regbase, unsigned short port)
+{
+	return vga_mm_r (regbase, port);
+}
+
+static inline void vga_w (void __iomem *regbase, unsigned short port, unsigned char val)
+{
+	vga_mm_w (regbase, port, val);
+}
 
 
+static inline void vga_w_fast (void __iomem *regbase, unsigned short port,
+			       unsigned char reg, unsigned char val)
+{
+	vga_mm_w_fast (regbase, port, reg, val);
+}
+#endif /* CONFIG_HAS_IOPORT */
+
 /*
  * VGA CRTC register read/write
  */
@@ -280,6 +298,7 @@ static inline void vga_wcrt (void __iomem *regbase, unsigned char reg, unsigned
 #endif /* VGA_OUTW_WRITE */
 }
 
+#ifdef CONFIG_HAS_IOPORT
 static inline unsigned char vga_io_rcrt (unsigned char reg)
 {
         vga_io_w (VGA_CRT_IC, reg);
@@ -295,6 +314,7 @@ static inline void vga_io_wcrt (unsigned char reg, unsigned char val)
         vga_io_w (VGA_CRT_DC, val);
 #endif /* VGA_OUTW_WRITE */
 }
+#endif /* CONFIG_HAS_IOPORT */
 
 static inline unsigned char vga_mm_rcrt (void __iomem *regbase, unsigned char reg)
 {
@@ -333,6 +353,7 @@ static inline void vga_wseq (void __iomem *regbase, unsigned char reg, unsigned
 #endif /* VGA_OUTW_WRITE */
 }
 
+#ifdef CONFIG_HAS_IOPORT
 static inline unsigned char vga_io_rseq (unsigned char reg)
 {
         vga_io_w (VGA_SEQ_I, reg);
@@ -348,6 +369,7 @@ static inline void vga_io_wseq (unsigned char reg, unsigned char val)
         vga_io_w (VGA_SEQ_D, val);
 #endif /* VGA_OUTW_WRITE */
 }
+#endif /* CONFIG_HAS_IOPORT */
 
 static inline unsigned char vga_mm_rseq (void __iomem *regbase, unsigned char reg)
 {
@@ -385,6 +407,7 @@ static inline void vga_wgfx (void __iomem *regbase, unsigned char reg, unsigned
 #endif /* VGA_OUTW_WRITE */
 }
 
+#ifdef CONFIG_HAS_IOPORT
 static inline unsigned char vga_io_rgfx (unsigned char reg)
 {
         vga_io_w (VGA_GFX_I, reg);
@@ -400,6 +423,7 @@ static inline void vga_io_wgfx (unsigned char reg, unsigned char val)
         vga_io_w (VGA_GFX_D, val);
 #endif /* VGA_OUTW_WRITE */
 }
+#endif /* CONFIG_HAS_IOPORT */
 
 static inline unsigned char vga_mm_rgfx (void __iomem *regbase, unsigned char reg)
 {
@@ -434,6 +458,7 @@ static inline void vga_wattr (void __iomem *regbase, unsigned char reg, unsigned
         vga_w (regbase, VGA_ATT_W, val);
 }
 
+#ifdef CONFIG_HAS_IOPORT
 static inline unsigned char vga_io_rattr (unsigned char reg)
 {
         vga_io_w (VGA_ATT_IW, reg);
@@ -445,6 +470,7 @@ static inline void vga_io_wattr (unsigned char reg, unsigned char val)
         vga_io_w (VGA_ATT_IW, reg);
         vga_io_w (VGA_ATT_W, val);
 }
+#endif /* CONFIG_HAS_IOPORT */
 
 static inline unsigned char vga_mm_rattr (void __iomem *regbase, unsigned char reg)
 {
-- 
cgit v1.2.3


From ba386777a30b38dabcc7fb8a89ec2869a09915f7 Mon Sep 17 00:00:00 2001
From: Vignesh Balasubramanian <vigbalas@amd.com>
Date: Thu, 25 Jul 2024 21:40:18 +0530
Subject: x86/elf: Add a new FPU buffer layout info to x86 core files

Add a new .note section containing type, size, offset and flags of every
xfeature that is present.

This information will be used by debuggers to understand the XSAVE layout of
the machine where the core file has been dumped, and to read XSAVE registers,
especially during cross-platform debugging.

The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
Memory Protection Keys and the AVX-512 features have been inculcated into
the AMD CPUs.

Since AMD never adopted (and hence never left room in the XSAVE layout for)
the Intel MPX feature, tools like GDB had assumed a fixed XSAVE layout
matching that of Intel (based on the XCR0 mask).

Hence, core dumps from AMD CPUs didn't match the known size for the XCR0 mask.
This resulted in GDB and other tools not being able to access the values of
the AVX-512 and PKRU registers on AMD CPUs.

To solve this, an interim solution has been accepted into GDB, and is already
a part of GDB 14, see

  https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html.

But it depends on heuristics based on the total XSAVE register set size
and the XCR0 mask to infer the layouts of the various register blocks
for core dumps, and hence, is not a foolproof mechanism to determine the
layout of the XSAVE area.

Therefore, add a new core dump note in order to allow GDB/LLDB and other
relevant tools to determine the layout of the XSAVE area of the machine where
the corefile was dumped.

The new core dump note (which is being proposed as a per-process .note
section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.

Each structure describes an individual extended feature containing
offset, size and flags in this format:

  struct x86_xfeat_component {
         u32 type;
         u32 size;
         u32 offset;
         u32 flags;
  };

and in an independent manner, allowing for future extensions without depending
on hw arch specifics like CPUID etc.

  [ bp: Massage commit message, zap trailing whitespace. ]

Co-developed-by: Jini Susan George <jinisusan.george@amd.com>
Signed-off-by: Jini Susan George <jinisusan.george@amd.com>
Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Vignesh Balasubramanian <vigbalas@amd.com>
Link: https://lore.kernel.org/r/20240725161017.112111-2-vigbalas@amd.com
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..e30a9b47dc87 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -411,6 +411,7 @@ typedef struct elf64_shdr {
 #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
 /* Old binutils treats 0x203 as a CET state */
 #define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT	0x205	/* XSAVE layout description */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 #define NT_S390_TIMER	0x301		/* s390 timer register */
 #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
-- 
cgit v1.2.3


From 52c3fb1a0f822fd64529ca64f3792095524de450 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 18 Jul 2024 10:21:49 +0200
Subject: perf/x86: Add hw_perf_event::aux_config

Start a new section for AUX PMUs in hw_perf_event.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/perf_event.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a8942277dda..6bb0c21d6335 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -168,6 +168,9 @@ struct hw_perf_event {
 			struct hw_perf_event_extra extra_reg;
 			struct hw_perf_event_extra branch_reg;
 		};
+		struct { /* aux / Intel-PT */
+			u64		aux_config;
+		};
 		struct { /* software */
 			struct hrtimer	hrtimer;
 		};
-- 
cgit v1.2.3


From f23c042ce34ba265cf3129d530702b5d218e3f4b Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Mon, 27 May 2024 14:06:47 +0200
Subject: sched/deadline: Comment sched_dl_entity::dl_server variable

Add an explanation for the newly added variable.

Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/147f7aa8cb8fd925f36aa8059af6a35aad08b45a.1716811044.git.bristot@kernel.org
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8d150343d42..1c771ea4481d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -639,6 +639,8 @@ struct sched_dl_entity {
 	 *
 	 * @dl_overrun tells if the task asked to be informed about runtime
 	 * overruns.
+	 *
+	 * @dl_server tells if this is a server entity.
 	 */
 	unsigned int			dl_throttled      : 1;
 	unsigned int			dl_yielded        : 1;
-- 
cgit v1.2.3


From a110a81c52a9de73e2e57e826dd3bf0fd4c22226 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Mon, 27 May 2024 14:06:51 +0200
Subject: sched/deadline: Deferrable dl server

Among the motivations for the DL servers is the real-time throttling
mechanism. This mechanism works by throttling the rt_rq after
running for a long period without leaving space for fair tasks.

The base dl server avoids this problem by boosting fair tasks instead
of throttling the rt_rq. The point is that it boosts without waiting
for potential starvation, causing some non-intuitive cases.

For example, an IRQ dispatches two tasks on an idle system, a fair
and an RT. The DL server will be activated, running the fair task
before the RT one. This problem can be avoided by deferring the
dl server activation.

By setting the defer option, the dl_server will dispatch an
SCHED_DEADLINE reservation with replenished runtime, but throttled.

The dl_timer will be set for the defer time at (period - runtime) ns
from start time. Thus boosting the fair rq at defer time.

If the fair scheduler has the opportunity to run while waiting
for defer time, the dl server runtime will be consumed. If
the runtime is completely consumed before the defer time, the
server will be replenished while still in a throttled state. Then,
the dl_timer will be reset to the new defer time

If the fair server reaches the defer time without consuming
its runtime, the server will start running, following CBS rules
(thus without breaking SCHED_DEADLINE). Then the server will
continue the running state (without deferring) until it fair
tasks are able to execute as regular fair scheduler (end of
the starvation).

Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/dd175943c72533cd9f0b87767c6499204879cc38.1716811044.git.bristot@kernel.org
---
 include/linux/sched.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c771ea4481d..4edd7e2096fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -641,12 +641,24 @@ struct sched_dl_entity {
 	 * overruns.
 	 *
 	 * @dl_server tells if this is a server entity.
+	 *
+	 * @dl_defer tells if this is a deferred or regular server. For
+	 * now only defer server exists.
+	 *
+	 * @dl_defer_armed tells if the deferrable server is waiting
+	 * for the replenishment timer to activate it.
+	 *
+	 * @dl_defer_running tells if the deferrable server is actually
+	 * running, skipping the defer phase.
 	 */
 	unsigned int			dl_throttled      : 1;
 	unsigned int			dl_yielded        : 1;
 	unsigned int			dl_non_contending : 1;
 	unsigned int			dl_overrun	  : 1;
 	unsigned int			dl_server         : 1;
+	unsigned int			dl_defer	  : 1;
+	unsigned int			dl_defer_armed	  : 1;
+	unsigned int			dl_defer_running  : 1;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
-- 
cgit v1.2.3


From c8a85394cfdb4696b4e2f8a0f3066a1c921af426 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 27 May 2024 14:06:54 +0200
Subject: sched/core: Fix picking of tasks for core scheduling with DL server

* Use simple CFS pick_task for DL pick_task

  DL server's pick_task calls CFS's pick_next_task_fair(), this is wrong
  because core scheduling's pick_task only calls CFS's pick_task() for
  evaluation / checking of the CFS task (comparing across CPUs), not for
  actually affirmatively picking the next task. This causes RB tree
  corruption issues in CFS that were found by syzbot.

* Make pick_task_fair clear DL server

  A DL task pick might set ->dl_server, but it is possible the task will
  never run (say the other HT has a stop task). If the CFS task is picked
  in the future directly (say without DL server), ->dl_server will be
  set. So clear it in pick_task_fair().

This fixes the KASAN issue reported by syzbot in set_next_entity().

(DL refactoring suggestions by Vineeth Pillai).

Reported-by: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vineeth Pillai <vineeth@bitbyteword.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/b10489ab1f03d23e08e6097acea47442e7d6466f.1716811044.git.bristot@kernel.org
---
 include/linux/sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4edd7e2096fb..2c1b4ee3234f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -686,7 +686,8 @@ struct sched_dl_entity {
 	 */
 	struct rq			*rq;
 	dl_server_has_tasks_f		server_has_tasks;
-	dl_server_pick_f		server_pick;
+	dl_server_pick_f		server_pick_next;
+	dl_server_pick_f		server_pick_task;
 
 #ifdef CONFIG_RT_MUTEXES
 	/*
-- 
cgit v1.2.3


From 02672e609fa93dd5835783830bf18387916a077a Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 19 Jul 2024 11:39:30 +0200
Subject: dt-bindings: clock: axg-audio: add earcrx clock ids

Add clock IDs for the eARC Rx device found on sm1 SoCs

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240719093934.3985139-2-jbrunet@baylibre.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/axg-audio-clkc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/axg-audio-clkc.h b/include/dt-bindings/clock/axg-audio-clkc.h
index 08c82c22fa5f..607f23b83fa7 100644
--- a/include/dt-bindings/clock/axg-audio-clkc.h
+++ b/include/dt-bindings/clock/axg-audio-clkc.h
@@ -155,5 +155,12 @@
 #define AUD_CLKID_SYSCLK_B_DIV		175
 #define AUD_CLKID_SYSCLK_A_EN		176
 #define AUD_CLKID_SYSCLK_B_EN		177
+#define AUD_CLKID_EARCRX		178
+#define AUD_CLKID_EARCRX_CMDC_SEL	179
+#define AUD_CLKID_EARCRX_CMDC_DIV	180
+#define AUD_CLKID_EARCRX_CMDC		181
+#define AUD_CLKID_EARCRX_DMAC_SEL	182
+#define AUD_CLKID_EARCRX_DMAC_DIV	183
+#define AUD_CLKID_EARCRX_DMAC		184
 
 #endif /* __AXG_AUDIO_CLKC_BINDINGS_H */
-- 
cgit v1.2.3


From 8cd44dd1d17a23d5cc8c443c659ca57aa76e2fa5 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Wed, 15 Feb 2023 09:18:02 +0900
Subject: btrfs: zoned: fix zone_unusable accounting on making block group
 read-write again

When btrfs makes a block group read-only, it adds all free regions in the
block group to space_info->bytes_readonly. That free space excludes
reserved and pinned regions. OTOH, when btrfs makes the block group
read-write again, it moves all the unused regions into the block group's
zone_unusable. That unused region includes reserved and pinned regions.
As a result, it counts too much zone_unusable bytes.

Fortunately (or unfortunately), having erroneous zone_unusable does not
affect the calculation of space_info->bytes_readonly, because free
space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on
the erroneous zone_unusable and it reduces the num_bytes just to cancel the
error.

This behavior can be easily discovered by adding a WARN_ON to check e.g,
"bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test
case like btrfs/282.

Fix it by properly considering pinned and reserved in
btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce
btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake.

Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index eeb56975bee7..de55a555d95b 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
 	TP_ARGS(fs_info, sinfo, old, diff)
 );
 
+DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable,
+
+	TP_PROTO(const struct btrfs_fs_info *fs_info,
+		 const struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+	TP_ARGS(fs_info, sinfo, old, diff)
+);
+
 DECLARE_EVENT_CLASS(btrfs_raid56_bio,
 
 	TP_PROTO(const struct btrfs_raid_bio *rbio,
-- 
cgit v1.2.3


From 5297bba507dc54045e6efeb9955c1271ca9aafe1 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut+renesas@mailbox.org>
Date: Tue, 23 Jul 2024 15:27:01 +0200
Subject: genirq/msi: Silence 'set affinity failed' warning

Various PCI controllers that mux MSIs onto a single IRQ line produce these
"IRQ%d: set affinity failed" warnings when entering suspend. This has been
discussed before [1] [2] and an example test case is included at the end of
this commit message.

Controller drivers that create MSI IRQ domain with
MSI_FLAG_USE_DEF_CHIP_OPS and do not override the .irq_set_affinity()
irqchip callback get assigned the default msi_domain_set_affinity()
callback. That is not desired on controllers where it is not possible to
set affinity of each MSI IRQ line to a specific CPU core due to hardware
limitation.

Introduce flag MSI_FLAG_NO_AFFINITY, which keeps .irq_set_affinity() unset
if the controller driver did not assign it.  This way, migrate_one_irq()
can exit right away, without printing the warning. The .irq_set_affinity()
implementations which only return -EINVAL can be removed from multiple
controller drivers.

  $ grep 25 /proc/interrupts
   25:   0 0 0 0 0 0 0 0   PCIe MSI   0   Edge   PCIe PME

  $ echo core > /sys/power/pm_test ; echo mem > /sys/power/state
  ...
  Disabling non-boot CPUs ...
  IRQ25: set affinity failed(-22). <---------- This is being silenced here
  psci: CPU7 killed (polled 4 ms)
  ...

[1] https://lore.kernel.org/all/d4a6eea3c5e33a3a4056885419df95a7@kernel.org/
[2] https://lore.kernel.org/all/5f4947b18bf381615a37aa81c2242477@kernel.org/

Link: https://lore.kernel.org/r/20240723132958.41320-2-marek.vasut+renesas@mailbox.org
Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
[bhelgaas: commit log]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 944979763825..b10093c4d00e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -554,6 +554,8 @@ enum {
 	MSI_FLAG_MSIX_CONTIGUOUS	= (1 << 19),
 	/* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */
 	MSI_FLAG_PCI_MSIX_ALLOC_DYN	= (1 << 20),
+	/* PCI MSIs cannot be steered separately to CPU cores */
+	MSI_FLAG_NO_AFFINITY		= (1 << 21),
 };
 
 /**
-- 
cgit v1.2.3


From 2accfdb7eff65f390c4308b0e9cb7c3fe48ad63c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Jul 2024 10:58:28 -0700
Subject: profiling: attempt to remove per-cpu profile flip buffer

This is the really old legacy kernel profiling code, which has long
since been obviated by "real profiling" (ie 'prof' and company), and
mainly remains as a source of syzbot reports.

There are anecdotal reports that people still use it for boot-time
profiling, but it's unlikely that such use would care about the old NUMA
optimizations in this code from 2004 (commit ad02973d42: "profile: 512x
Altix timer interrupt livelock fix" in the BK import archive at [1])

So in order to head off future syzbot reports, let's try to simplify
this code and get rid of the per-cpu profile buffers that are quite a
large portion of the complexity footprint of this thing (including CPU
hotplug callbacks etc).

It's unlikely anybody will actually notice, or possibly, as Thomas put
it: "Only people who indulge in nostalgia will notice :)".

That said, if it turns out that this code is actually actively used by
somebody, we can always revert this removal.  Thus the "attempt" in the
summary line.

[ Note: in a small nod to "the profiling code can cause NUMA problems",
  this also removes the "increment the last entry in the profiling array
  on any unknown hits" logic. That would account any program counter in
  a module to that single counter location, and might exacerbate any
  NUMA cacheline bouncing issues ]

Link: https://lore.kernel.org/all/CAHk-=wgs52BxT4Zjmjz8aNvHWKxf5_ThBY4bYL1Y6CTaNL2dTw@mail.gmail.com/
Link:  https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git [1]
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 51ba681b915a..affdd890899e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,7 +100,6 @@ enum cpuhp_state {
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-	CPUHP_PROFILE_PREPARE,
 	CPUHP_X2APIC_PREPARE,
 	CPUHP_SMPCFD_PREPARE,
 	CPUHP_RELAY_PREPARE,
-- 
cgit v1.2.3


From de79583ffe794663c53b77f97be814522d4edc4f Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Tue, 2 Jul 2024 18:02:33 +0200
Subject: iio: core: add accessors 'masklength'

'masklength' is supposed to be an IIO private member. However, drivers
(often in trigger handlers) need to access it to iterate over the
enabled channels for example (there are other reasons). Hence, a couple
of new accessors are being added:

 * iio_for_each_active_channel() - Iterates over the active channels;
 * iio_get_masklength() - Get length of the channels mask.

The goal of these new accessors is to annotate 'masklength' as private
as soon as all drivers accessing it are converted to use the new
helpers.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Reviewed-by: Alexandru Ardelean <aardelean@baylibre.com>
Link: https://patch.msgid.link/20240702-dev-iio-masklength-private-v1-1-98193bf536a6@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 894309294182..dd6bbc468283 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -855,6 +855,24 @@ static inline const struct iio_scan_type
 	return &chan->scan_type;
 }
 
+/**
+ * iio_get_masklength - Get length of the channels mask
+ * @indio_dev: the IIO device to get the masklength for
+ */
+static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev)
+{
+	return indio_dev->masklength;
+}
+
+/**
+ * iio_for_each_active_channel - Iterated over active channels
+ * @indio_dev: the IIO device
+ * @chan: Holds the index of the enabled channel
+ */
+#define iio_for_each_active_channel(indio_dev, chan) \
+	for_each_set_bit((chan), (indio_dev)->active_scan_mask, \
+			 iio_get_masklength(indio_dev))
+
 ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals);
 
 int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
-- 
cgit v1.2.3


From 4bf79f9be434e000c8e12fe83b2f4402480f1460 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Jul 2024 13:23:53 -0700
Subject: bpf: Track equal scalars history on per-instruction level

Use bpf_verifier_state->jmp_history to track which registers were
updated by find_equal_scalars() (renamed to collect_linked_regs())
when conditional jump was verified. Use recorded information in
backtrack_insn() to propagate precision.

E.g. for the following program:

            while verifying instructions
  1: r1 = r0              |
  2: if r1 < 8  goto ...  | push r0,r1 as linked registers in jmp_history
  3: if r0 > 16 goto ...  | push r0,r1 as linked registers in jmp_history
  4: r2 = r10             |
  5: r2 += r0             v mark_chain_precision(r0)

            while doing mark_chain_precision(r0)
  5: r2 += r0             | mark r0 precise
  4: r2 = r10             |
  3: if r0 > 16 goto ...  | mark r0,r1 as precise
  2: if r1 < 8  goto ...  | mark r0,r1 as precise
  1: r1 = r0              v

Technically, do this as follows:
- Use 10 bits to identify each register that gains range because of
  sync_linked_regs():
  - 3 bits for frame number;
  - 6 bits for register or stack slot number;
  - 1 bit to indicate if register is spilled.
- Use u64 as a vector of 6 such records + 4 bits for vector length.
- Augment struct bpf_jmp_history_entry with a field 'linked_regs'
  representing such vector.
- When doing check_cond_jmp_op() remember up to 6 registers that
  gain range because of sync_linked_regs() in such a vector.
- Don't propagate range information and reset IDs for registers that
  don't fit in 6-value vector.
- Push a pair {instruction index, linked registers vector}
  to bpf_verifier_state->jmp_history.
- When doing backtrack_insn() check if any of recorded linked
  registers is currently marked precise, if so mark all linked
  registers as precise.

This also requires fixes for two test_verifier tests:
- precise: test 1
- precise: test 2

Both tests contain the following instruction sequence:

19: (bf) r2 = r9                      ; R2=scalar(id=3) R9=scalar(id=3)
20: (a5) if r2 < 0x8 goto pc+1        ; R2=scalar(id=3,umin=8)
21: (95) exit
22: (07) r2 += 1                      ; R2_w=scalar(id=3+1,...)
23: (bf) r1 = r10                     ; R1_w=fp0 R10=fp0
24: (07) r1 += -8                     ; R1_w=fp-8
25: (b7) r3 = 0                       ; R3_w=0
26: (85) call bpf_probe_read_kernel#113

The call to bpf_probe_read_kernel() at (26) forces r2 to be precise.
Previously, this forced all registers with same id to become precise
immediately when mark_chain_precision() is called.
After this change, the precision is propagated to registers sharing
same id only when 'if' instruction is backtracked.
Hence verification log for both tests is changed:
regs=r2,r9 -> regs=r2 for instructions 25..20.

Fixes: 904e6ddf4133 ("bpf: Use scalar ids in mark_chain_precision()")
Reported-by: Hao Sun <sunhao.th@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240718202357.1746514-2-eddyz87@gmail.com

Closes: https://lore.kernel.org/bpf/CAEf4BzZ0xidVCqB47XnkXcNhkPWF6_nTV7yt+_Lf0kcFEut2Mg@mail.gmail.com/
---
 include/linux/bpf_verifier.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6503c85b10a3..731a0a4ac13c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -371,6 +371,10 @@ struct bpf_jmp_history_entry {
 	u32 prev_idx : 22;
 	/* special flags, e.g., whether insn is doing register stack spill/load */
 	u32 flags : 10;
+	/* additional registers that need precision tracking when this
+	 * jump is backtracked, vector of six 10-bit records
+	 */
+	u64 linked_regs;
 };
 
 /* Maximum number of register states that can exist at once */
-- 
cgit v1.2.3


From e42ac14180554fa23a3312d4f921dc4ea7972fb7 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 22 Jul 2024 11:30:45 -0700
Subject: bpf: Check unsupported ops from the bpf_struct_ops's cfi_stubs

The bpf_tcp_ca struct_ops currently uses a "u32 unsupported_ops[]"
array to track which ops is not supported.

After cfi_stubs had been added, the function pointer in cfi_stubs is
also NULL for the unsupported ops. Thus, the "u32 unsupported_ops[]"
becomes redundant. This observation was originally brought up in the
bpf/cfi discussion:
https://lore.kernel.org/bpf/CAADnVQJoEkdjyCEJRPASjBw1QGsKYrF33QdMGc1RZa9b88bAEA@mail.gmail.com/

The recent bpf qdisc patch (https://lore.kernel.org/bpf/20240714175130.4051012-6-amery.hung@bytedance.com/)
also needs to specify quite many unsupported ops. It is a good time
to clean it up.

This patch removes the need of "u32 unsupported_ops[]" and tests for null-ness
in the cfi_stubs instead.

Testing the cfi_stubs is done in a new function bpf_struct_ops_supported().
The verifier will call bpf_struct_ops_supported() when loading the
struct_ops program. The ".check_member" is removed from the bpf_tcp_ca
in this patch. ".check_member" could still be useful for other subsytems
to enforce other restrictions (e.g. sched_ext checks for prog->sleepable).

To keep the same error return, ENOTSUPP is used.

Cc: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240722183049.2254692-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3b94ec161e8c..4c54864316ee 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1795,6 +1795,7 @@ struct bpf_struct_ops_common_value {
 #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
 bool bpf_struct_ops_get(const void *kdata);
 void bpf_struct_ops_put(const void *kdata);
+int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
 int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 				       void *value);
 int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
@@ -1851,6 +1852,10 @@ static inline void bpf_module_put(const void *data, struct module *owner)
 {
 	module_put(owner);
 }
+static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
+{
+	return -ENOTSUPP;
+}
 static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 						     void *key,
 						     void *value)
-- 
cgit v1.2.3


From 52dea0a15cc888e89f0144dca7b712fb56d12a28 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jun 2024 18:42:28 +0200
Subject: posix-timers: Convert timer list to hlist

No requirement for a real list. Spare a few bytes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/posix-timers.h | 2 +-
 include/linux/sched/signal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index dc7b738de299..453691710839 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -158,7 +158,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @rcu:		RCU head for freeing the timer.
  */
 struct k_itimer {
-	struct list_head	list;
+	struct hlist_node	list;
 	struct hlist_node	t_hash;
 	spinlock_t		it_lock;
 	const struct k_clock	*kclock;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0a0e23c45406..622fdef78e14 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -137,7 +137,7 @@ struct signal_struct {
 
 	/* POSIX.1b Interval Timers */
 	unsigned int		next_posix_timer_id;
-	struct list_head	posix_timers;
+	struct hlist_head	posix_timers;
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
-- 
cgit v1.2.3


From a2b80ce87a87fc18c594e74d13031d5e347b69cb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jun 2024 18:42:33 +0200
Subject: signal: Remove task argument from dequeue_signal()

The task pointer which is handed to dequeue_signal() is always current. The
argument along with the first comment about signalfd in that function is
confusing at best. Remove it and use current internally.

Update the stale comment for dequeue_signal() while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched/signal.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 622fdef78e14..c8ed09ac29ac 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -276,8 +276,7 @@ static inline void signal_set_stop_flags(struct signal_struct *sig,
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *task, sigset_t *mask,
-			  kernel_siginfo_t *info, enum pid_type *type);
+extern int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type);
 
 static inline int kernel_dequeue_signal(void)
 {
@@ -287,7 +286,7 @@ static inline int kernel_dequeue_signal(void)
 	int ret;
 
 	spin_lock_irq(&task->sighand->siglock);
-	ret = dequeue_signal(task, &task->blocked, &__info, &__type);
+	ret = dequeue_signal(&task->blocked, &__info, &__type);
 	spin_unlock_irq(&task->sighand->siglock);
 
 	return ret;
-- 
cgit v1.2.3


From 5d99e198be279045e6ecefe220f5c52f8ce9bfd5 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Fri, 19 Jul 2024 19:00:52 +0800
Subject: bpf, lsm: Add check for BPF LSM return value

A bpf prog returning a positive number attached to file_alloc_security
hook makes kernel panic.

This happens because file system can not filter out the positive number
returned by the LSM prog using IS_ERR, and misinterprets this positive
number as a file pointer.

Given that hook file_alloc_security never returned positive number
before the introduction of BPF LSM, and other BPF LSM hooks may
encounter similar issues, this patch adds LSM return value check
in verifier, to ensure no unexpected value is returned.

Fixes: 520b7aa00d8c ("bpf: lsm: Initialize the BPF LSM hooks")
Reported-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240719110059.797546-3-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h     | 1 +
 include/linux/bpf_lsm.h | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4c54864316ee..5739cc9986f8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -927,6 +927,7 @@ struct bpf_insn_access_aux {
 		};
 	};
 	struct bpf_verifier_log *log; /* for verbose logs */
+	bool is_retval; /* is accessing function return value ? */
 };
 
 static inline void
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 1de7ece5d36d..aefcd6564251 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -9,6 +9,7 @@
 
 #include <linux/sched.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/lsm_hooks.h>
 
 #ifdef CONFIG_BPF_LSM
@@ -45,6 +46,8 @@ void bpf_inode_storage_free(struct inode *inode);
 
 void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
 
+int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+			     struct bpf_retval_range *range);
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -78,6 +81,11 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
 {
 }
 
+static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+					   struct bpf_retval_range *range)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
-- 
cgit v1.2.3


From 28ead3eaabc16ecc907cfb71876da028080f6356 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Fri, 19 Jul 2024 19:00:53 +0800
Subject: bpf: Prevent tail call between progs attached to different hooks

bpf progs can be attached to kernel functions, and the attached functions
can take different parameters or return different return values. If
prog attached to one kernel function tail calls prog attached to another
kernel function, the ctx access or return value verification could be
bypassed.

For example, if prog1 is attached to func1 which takes only 1 parameter
and prog2 is attached to func2 which takes two parameters. Since verifier
assumes the bpf ctx passed to prog2 is constructed based on func2's
prototype, verifier allows prog2 to access the second parameter from
the bpf ctx passed to it. The problem is that verifier does not prevent
prog1 from passing its bpf ctx to prog2 via tail call. In this case,
the bpf ctx passed to prog2 is constructed from func1 instead of func2,
that is, the assumption for ctx access verification is bypassed.

Another example, if BPF LSM prog1 is attached to hook file_alloc_security,
and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier
knows the return value rules for these two hooks, e.g. it is legal for
bpf_lsm_audit_rule_known to return positive number 1, and it is illegal
for file_alloc_security to return positive number. So verifier allows
prog2 to return positive number 1, but does not allow prog1 to return
positive number. The problem is that verifier does not prevent prog1
from calling prog2 via tail call. In this case, prog2's return value 1
will be used as the return value for prog1's hook file_alloc_security.
That is, the return value rule is bypassed.

This patch adds restriction for tail call to prevent such bypasses.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5739cc9986f8..f16d0753f518 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -294,6 +294,7 @@ struct bpf_map {
 	 * same prog type, JITed flag and xdp_has_frags flag.
 	 */
 	struct {
+		const struct btf_type *attach_func_proto;
 		spinlock_t lock;
 		enum bpf_prog_type type;
 		bool jited;
-- 
cgit v1.2.3


From 2aff9d20d50ac45dd13a013ef5231f4fb8912356 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 10 Jul 2024 14:32:25 -0700
Subject: lsm: infrastructure management of the sock security

Move management of the sock->sk_security blob out
of the individual security modules and into the security
infrastructure. Instead of allocating the blobs from within
the modules the modules tell the infrastructure how much
space is required, and the space is allocated there.

Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: John Johansen <john.johansen@canonical.com>
Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index a2ade0ffe9e7..efd4a0655159 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -73,6 +73,7 @@ struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
 	int	lbs_inode;
+	int	lbs_sock;
 	int	lbs_superblock;
 	int	lbs_ipc;
 	int	lbs_msg_msg;
-- 
cgit v1.2.3


From 5f8d28f6d7d568dbbc8c5bce94894474c07afd4f Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 10 Jul 2024 14:32:26 -0700
Subject: lsm: infrastructure management of the key security blob

Move management of the key->security blob out of the individual security
modules and into the security infrastructure. Instead of allocating the
blobs from within the modules the modules tell the infrastructure how
much space is required, and the space is allocated there.  There are
no existing modules that require a key_free hook, so the call to it and
the definition for it have been removed.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
[PM: subject tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 1 -
 include/linux/lsm_hooks.h     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 855db460e08b..8cc60644f3bd 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -403,7 +403,6 @@ LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid,
 #ifdef CONFIG_KEYS
 LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred,
 	 unsigned long flags)
-LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
 LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
 	 enum key_need_perm need_perm)
 LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index efd4a0655159..7233bc0737be 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -76,6 +76,7 @@ struct lsm_blob_sizes {
 	int	lbs_sock;
 	int	lbs_superblock;
 	int	lbs_ipc;
+	int	lbs_key;
 	int	lbs_msg_msg;
 	int	lbs_task;
 	int	lbs_xattr_count; /* number of xattr slots in new_xattrs array */
-- 
cgit v1.2.3


From a39c0f77dbbe083f3eec6c3b32d90f168f7575eb Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 10 Jul 2024 14:32:28 -0700
Subject: lsm: infrastructure management of the dev_tun blob

Move management of the dev_tun security blob out of the individual
security modules and into the LSM infrastructure.  The security modules
tell the infrastructure how much space they require at initialization.
There are no longer any modules that require the dev_tun_free hook.
The hook definition has been removed.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
[PM: subject tweak, selinux style fixes]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 3 +--
 include/linux/lsm_hooks.h     | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8cc60644f3bd..22c475f530ae 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -353,8 +353,7 @@ LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void)
 LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void)
 LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req,
 	 struct flowi_common *flic)
-LSM_HOOK(int, 0, tun_dev_alloc_security, void **security)
-LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security)
+LSM_HOOK(int, 0, tun_dev_alloc_security, void *security)
 LSM_HOOK(int, 0, tun_dev_create, void)
 LSM_HOOK(int, 0, tun_dev_attach_queue, void *security)
 LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 7233bc0737be..0ff14ff128c8 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -80,6 +80,7 @@ struct lsm_blob_sizes {
 	int	lbs_msg_msg;
 	int	lbs_task;
 	int	lbs_xattr_count; /* number of xattr slots in new_xattrs array */
+	int	lbs_tun_dev;
 };
 
 /**
-- 
cgit v1.2.3


From 66de33a0bbb59ef3909d2c65dbbb7fc503d573bd Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 10 Jul 2024 14:32:29 -0700
Subject: lsm: infrastructure management of the infiniband blob

Move management of the infiniband security blob out of the individual
security modules and into the LSM infrastructure.  The security modules
tell the infrastructure how much space they require at initialization.
There are no longer any modules that require the ib_free() hook.
The hook definition has been removed.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
[PM: subject tweak, selinux style fixes]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 3 +--
 include/linux/lsm_hooks.h     | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 22c475f530ae..4fcbc5b3f58c 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -373,8 +373,7 @@ LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk)
 LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey)
 LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name,
 	 u8 port_num)
-LSM_HOOK(int, 0, ib_alloc_security, void **sec)
-LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec)
+LSM_HOOK(int, 0, ib_alloc_security, void *sec)
 #endif /* CONFIG_SECURITY_INFINIBAND */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0ff14ff128c8..b6fc6ac88723 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -72,6 +72,7 @@ struct security_hook_list {
 struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
+	int	lbs_ib;
 	int	lbs_inode;
 	int	lbs_sock;
 	int	lbs_superblock;
-- 
cgit v1.2.3


From 61a1dcdceb44d79e5ab511295791b88ea178c045 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 10 Jul 2024 14:32:30 -0700
Subject: lsm: infrastructure management of the perf_event security blob

Move management of the perf_event->security blob out of the individual
security modules and into the security infrastructure. Instead of
allocating the blobs from within the modules the modules tell the
infrastructure how much space is required, and the space is allocated
there.  There are no longer any modules that require the perf_event_free()
hook.  The hook definition has been removed.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
[PM: subject tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 1 -
 include/linux/lsm_hooks.h     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 4fcbc5b3f58c..12c81be78eb9 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -439,7 +439,6 @@ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
 #ifdef CONFIG_PERF_EVENTS
 LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
 LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event)
-LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event)
 LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
 LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
 #endif /* CONFIG_PERF_EVENTS */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index b6fc6ac88723..f1ca8082075a 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -79,6 +79,7 @@ struct lsm_blob_sizes {
 	int	lbs_ipc;
 	int	lbs_key;
 	int	lbs_msg_msg;
+	int	lbs_perf_event;
 	int	lbs_task;
 	int	lbs_xattr_count; /* number of xattr slots in new_xattrs array */
 	int	lbs_tun_dev;
-- 
cgit v1.2.3


From 92de36080c93296ef9005690705cba260b9bd68a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 23 Jul 2024 08:34:39 -0700
Subject: bpf: Fail verification for sign-extension of packet
 data/data_end/data_meta

syzbot reported a kernel crash due to
  commit 1f1e864b6555 ("bpf: Handle sign-extenstin ctx member accesses").
The reason is due to sign-extension of 32-bit load for
packet data/data_end/data_meta uapi field.

The original code looks like:
        r2 = *(s32 *)(r1 + 76) /* load __sk_buff->data */
        r3 = *(u32 *)(r1 + 80) /* load __sk_buff->data_end */
        r0 = r2
        r0 += 8
        if r3 > r0 goto +1
        ...
Note that __sk_buff->data load has 32-bit sign extension.

After verification and convert_ctx_accesses(), the final asm code looks like:
        r2 = *(u64 *)(r1 +208)
        r2 = (s32)r2
        r3 = *(u64 *)(r1 +80)
        r0 = r2
        r0 += 8
        if r3 > r0 goto pc+1
        ...
Note that 'r2 = (s32)r2' may make the kernel __sk_buff->data address invalid
which may cause runtime failure.

Currently, in C code, typically we have
        void *data = (void *)(long)skb->data;
        void *data_end = (void *)(long)skb->data_end;
        ...
and it will generate
        r2 = *(u64 *)(r1 +208)
        r3 = *(u64 *)(r1 +80)
        r0 = r2
        r0 += 8
        if r3 > r0 goto pc+1

If we allow sign-extension,
        void *data = (void *)(long)(int)skb->data;
        void *data_end = (void *)(long)skb->data_end;
        ...
the generated code looks like
        r2 = *(u64 *)(r1 +208)
        r2 <<= 32
        r2 s>>= 32
        r3 = *(u64 *)(r1 +80)
        r0 = r2
        r0 += 8
        if r3 > r0 goto pc+1
and this will cause verification failure since "r2 <<= 32" is not allowed
as "r2" is a packet pointer.

To fix this issue for case
  r2 = *(s32 *)(r1 + 76) /* load __sk_buff->data */
this patch added additional checking in is_valid_access() callback
function for packet data/data_end/data_meta access. If those accesses
are with sign-extenstion, the verification will fail.

  [1] https://lore.kernel.org/bpf/000000000000c90eee061d236d37@google.com/

Reported-by: syzbot+ad9ec60c8eaf69e6f99c@syzkaller.appspotmail.com
Fixes: 1f1e864b6555 ("bpf: Handle sign-extenstin ctx member accesses")
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20240723153439.2429035-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f16d0753f518..f560ea0c2b36 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -920,6 +920,7 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
  */
 struct bpf_insn_access_aux {
 	enum bpf_reg_type reg_type;
+	bool is_ldsx;
 	union {
 		int ctx_field_size;
 		struct {
-- 
cgit v1.2.3


From 5b5f51bff1b66cedb62b5ba74a1878341204e057 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 22 Jul 2024 16:38:36 -0700
Subject: bpf: no_caller_saved_registers attribute for helper calls

GCC and LLVM define a no_caller_saved_registers function attribute.
This attribute means that function scratches only some of
the caller saved registers defined by ABI.
For BPF the set of such registers could be defined as follows:
- R0 is scratched only if function is non-void;
- R1-R5 are scratched only if corresponding parameter type is defined
  in the function prototype.

This commit introduces flag bpf_func_prot->allow_nocsr.
If this flag is set for some helper function, verifier assumes that
it follows no_caller_saved_registers calling convention.

The contract between kernel and clang allows to simultaneously use
such functions and maintain backwards compatibility with old
kernels that don't understand no_caller_saved_registers calls
(nocsr for short):

- clang generates a simple pattern for nocsr calls, e.g.:

    r1 = 1;
    r2 = 2;
    *(u64 *)(r10 - 8)  = r1;
    *(u64 *)(r10 - 16) = r2;
    call %[to_be_inlined]
    r2 = *(u64 *)(r10 - 16);
    r1 = *(u64 *)(r10 - 8);
    r0 = r1;
    r0 += r2;
    exit;

- kernel removes unnecessary spills and fills, if called function is
  inlined by verifier or current JIT (with assumption that patch
  inserted by verifier or JIT honors nocsr contract, e.g. does not
  scratch r3-r5 for the example above), e.g. the code above would be
  transformed to:

    r1 = 1;
    r2 = 2;
    call %[to_be_inlined]
    r0 = r1;
    r0 += r2;
    exit;

Technically, the transformation is split into the following phases:
- function mark_nocsr_patterns(), called from bpf_check()
  searches and marks potential patterns in instruction auxiliary data;
- upon stack read or write access,
  function check_nocsr_stack_contract() is used to verify if
  stack offsets, presumably reserved for nocsr patterns, are used
  only from those patterns;
- function remove_nocsr_spills_fills(), called from bpf_check(),
  applies the rewrite for valid patterns.

See comment in mark_nocsr_pattern_for_call() for more details.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240722233844.1406874-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h          |  6 ++++++
 include/linux/bpf_verifier.h | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f560ea0c2b36..b9425e410bcb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -808,6 +808,12 @@ struct bpf_func_proto {
 	bool gpl_only;
 	bool pkt_access;
 	bool might_sleep;
+	/* set to true if helper follows contract for gcc/llvm
+	 * attribute no_caller_saved_registers:
+	 * - void functions do not scratch r0
+	 * - functions taking N arguments scratch only registers r1-rN
+	 */
+	bool allow_nocsr;
 	enum bpf_return_type ret_type;
 	union {
 		struct {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 731a0a4ac13c..5cea15c81b8a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -576,6 +576,14 @@ struct bpf_insn_aux_data {
 	bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
 	bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
 	u8 alu_state; /* used in combination with alu_limit */
+	/* true if STX or LDX instruction is a part of a spill/fill
+	 * pattern for a no_caller_saved_registers call.
+	 */
+	u8 nocsr_pattern:1;
+	/* for CALL instructions, a number of spill/fill pairs in the
+	 * no_caller_saved_registers pattern.
+	 */
+	u8 nocsr_spills_num:3;
 
 	/* below fields are initialized once */
 	unsigned int orig_idx; /* original instruction index */
@@ -645,6 +653,10 @@ struct bpf_subprog_info {
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
 	u16 stack_extra;
+	/* offsets in range [stack_depth .. nocsr_stack_off)
+	 * are used for no_caller_saved_registers spills and fills.
+	 */
+	s16 nocsr_stack_off;
 	bool has_tail_call: 1;
 	bool tail_call_reachable: 1;
 	bool has_ld_abs: 1;
@@ -652,6 +664,8 @@ struct bpf_subprog_info {
 	bool is_async_cb: 1;
 	bool is_exception_cb: 1;
 	bool args_cached: 1;
+	/* true if nocsr stack region is used by functions that can't be inlined */
+	bool keep_nocsr_stack: 1;
 
 	u8 arg_cnt;
 	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
-- 
cgit v1.2.3


From 7ebd8c5acad5f8ca41f37b36dc62570e1fa13d8b Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 18 Jul 2024 16:59:06 +0900
Subject: ata: libata: Use QUIRK instead of HORKAGE

According to Wiktionary, the verb "hork" is computing slang defined as
"To foul up; to be occupied with difficulty, tangle, or unpleasantness;
to be broken" (https://en.wiktionary.org/wiki/hork#Verb). libata uses
this with the term "horkage" to refer to broken device features. Given
that this term is not widely used and its meaning unknown to many,
rename it to the more commonly used term "quirk", similar to many other
places in the kernel.

The renaming done is:
1) Rename all ATA_HORKAGE_XXX flags to ATA_QUIRK_XXX
2) Rename struct ata_device horkage field to quirks
3) Rename struct ata_blacklist_entry to struct ata_dev_quirks_entry. The
   array of these structures defining quirks for known devices is
   renamed __ata_dev_quirks.
4) The functions ata_dev_blacklisted() and ata_force_horkage() are
   renamed to ata_dev_quirks() and ata_force_quirks() respectively.
5) All the force_horkage_xxx() macros are renamed to force_quirk_xxx()

And while at it, make sure that the type "unsigned int" is used
consistantly for quirk flags variables and data structure fields.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
---
 include/linux/libata.h | 71 +++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 17394098bee9..05dd7038ab30 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -362,40 +362,41 @@ enum {
 	 */
 	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8,
 
-	/* Horkage types. May be set by libata or controller on drives
-	   (some horkage may be drive/controller pair dependent */
-
-	ATA_HORKAGE_DIAGNOSTIC	= (1 << 0),	/* Failed boot diag */
-	ATA_HORKAGE_NODMA	= (1 << 1),	/* DMA problems */
-	ATA_HORKAGE_NONCQ	= (1 << 2),	/* Don't use NCQ */
-	ATA_HORKAGE_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
-	ATA_HORKAGE_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
-	ATA_HORKAGE_DISABLE	= (1 << 5),	/* Disable it */
-	ATA_HORKAGE_HPA_SIZE	= (1 << 6),	/* native size off by one */
-	ATA_HORKAGE_IVB		= (1 << 8),	/* cbl det validity bit bugs */
-	ATA_HORKAGE_STUCK_ERR	= (1 << 9),	/* stuck ERR on next PACKET */
-	ATA_HORKAGE_BRIDGE_OK	= (1 << 10),	/* no bridge limits */
-	ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands
-						    not multiple of 16 bytes */
-	ATA_HORKAGE_FIRMWARE_WARN = (1 << 12),	/* firmware update warning */
-	ATA_HORKAGE_1_5_GBPS	= (1 << 13),	/* force 1.5 Gbps */
-	ATA_HORKAGE_NOSETXFER	= (1 << 14),	/* skip SETXFER, SATA only */
-	ATA_HORKAGE_BROKEN_FPDMA_AA	= (1 << 15),	/* skip AA */
-	ATA_HORKAGE_DUMP_ID	= (1 << 16),	/* dump IDENTIFY data */
-	ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17),	/* Set max sects to 65535 */
-	ATA_HORKAGE_ATAPI_DMADIR = (1 << 18),	/* device requires dmadir */
-	ATA_HORKAGE_NO_NCQ_TRIM	= (1 << 19),	/* don't use queued TRIM */
-	ATA_HORKAGE_NOLPM	= (1 << 20),	/* don't use LPM */
-	ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21),	/* some WDs have broken LPM */
-	ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */
-	ATA_HORKAGE_NO_DMA_LOG	= (1 << 23),	/* don't use DMA for log read */
-	ATA_HORKAGE_NOTRIM	= (1 << 24),	/* don't use TRIM */
-	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
-	ATA_HORKAGE_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
-	ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
-	ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28),	/* Identify device log missing */
-	ATA_HORKAGE_NO_LOG_DIR	= (1 << 29),	/* Do not read log directory */
-	ATA_HORKAGE_NO_FUA	= (1 << 30),	/* Do not use FUA */
+	/*
+	 * Quirk flags: may be set by libata or controller drivers on drives.
+	 * Some quirks may be drive/controller pair dependent.
+	 */
+	ATA_QUIRK_DIAGNOSTIC	= (1 << 0),	/* Failed boot diag */
+	ATA_QUIRK_NODMA		= (1 << 1),	/* DMA problems */
+	ATA_QUIRK_NONCQ		= (1 << 2),	/* Do not use NCQ */
+	ATA_QUIRK_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
+	ATA_QUIRK_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
+	ATA_QUIRK_DISABLE	= (1 << 5),	/* Disable it */
+	ATA_QUIRK_HPA_SIZE	= (1 << 6),	/* Native size off by one */
+	ATA_QUIRK_IVB		= (1 << 8),	/* CBL det validity bit bugs */
+	ATA_QUIRK_STUCK_ERR	= (1 << 9),	/* Stuck ERR on next PACKET */
+	ATA_QUIRK_BRIDGE_OK	= (1 << 10),	/* No bridge limits */
+	ATA_QUIRK_ATAPI_MOD16_DMA = (1 << 11),	/* Use ATAPI DMA for commands */
+						/* not multiple of 16 bytes */
+	ATA_QUIRK_FIRMWARE_WARN = (1 << 12),	/* Firmware update warning */
+	ATA_QUIRK_1_5_GBPS	= (1 << 13),	/* Force 1.5 Gbps */
+	ATA_QUIRK_NOSETXFER	= (1 << 14),	/* Skip SETXFER, SATA only */
+	ATA_QUIRK_BROKEN_FPDMA_AA = (1 << 15),	/* Skip AA */
+	ATA_QUIRK_DUMP_ID	= (1 << 16),	/* Dump IDENTIFY data */
+	ATA_QUIRK_MAX_SEC_LBA48 = (1 << 17),	/* Set max sects to 65535 */
+	ATA_QUIRK_ATAPI_DMADIR	= (1 << 18),	/* Device requires dmadir */
+	ATA_QUIRK_NO_NCQ_TRIM	= (1 << 19),	/* Do not use queued TRIM */
+	ATA_QUIRK_NOLPM		= (1 << 20),	/* Do not use LPM */
+	ATA_QUIRK_WD_BROKEN_LPM = (1 << 21),	/* Some WDs have broken LPM */
+	ATA_QUIRK_ZERO_AFTER_TRIM = (1 << 22),	/* Guarantees zero after trim */
+	ATA_QUIRK_NO_DMA_LOG	= (1 << 23),	/* Do not use DMA for log read */
+	ATA_QUIRK_NOTRIM	= (1 << 24),	/* Do not use TRIM */
+	ATA_QUIRK_MAX_SEC_1024	= (1 << 25),	/* Limit max sects to 1024 */
+	ATA_QUIRK_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
+	ATA_QUIRK_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
+	ATA_QUIRK_NO_ID_DEV_LOG = (1 << 28),	/* Identify device log missing */
+	ATA_QUIRK_NO_LOG_DIR	= (1 << 29),	/* Do not read log directory */
+	ATA_QUIRK_NO_FUA	= (1 << 30),	/* Do not use FUA */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
@@ -663,7 +664,7 @@ struct ata_cpr_log {
 struct ata_device {
 	struct ata_link		*link;
 	unsigned int		devno;		/* 0 or 1 */
-	unsigned int		horkage;	/* List of broken features */
+	unsigned int		quirks;		/* List of broken features */
 	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	struct scsi_device	*sdev;		/* attached SCSI device */
 	void			*private_data;
-- 
cgit v1.2.3


From 58157d607aecb4e05ab793408038b014c84e466f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 18 Jul 2024 16:54:03 +0900
Subject: ata: libata: Print quirks applied to devices

Introduce the function ata_dev_print_quirks() to print the quirk flags
that will be applied to a scanned device. This new function is called
from ata_dev_quirks() when a match on a device model or device model
and revision is found for a device in the __ata_dev_quirks array.

To implement this function, the ATA_QUIRK_ flags are redefined using
the new enum ata_quirk which defines the bit shift for each quirk
flag. The array of strings ata_quirk_names is used to define the name
of each flag, which are printed by ata_dev_print_quirks().

Example output for a device listed in the __ata_dev_quirks array and
which has the ATA_QUIRK_DISABLE flag applied:

[10193.461270] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
[10193.469190] ata1.00: Model 'ASMT109x- Config', rev '2143 5', applying quirks: disable
[10193.469195] ata1.00: unsupported device, disabling
[10193.481564] ata1.00: disable device

enum ata_quirk also defines the __ATA_QUIRK_MAX value as one plus the
last quirk flag defined. This value is used in ata_dev_quirks() to add a
build time check that all quirk flags fit within the unsigned int
(32-bits) quirks field of struct ata_device.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 106 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 72 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 05dd7038ab30..d598ef690e50 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -55,6 +55,46 @@
 /* defines only for the constants which don't work well as enums */
 #define ATA_TAG_POISON		0xfafbfcfdU
 
+/*
+ * Quirk flags bits.
+ * ata_device->quirks is an unsigned int, so __ATA_QUIRK_MAX must not exceed 32.
+ */
+enum ata_quirks {
+	__ATA_QUIRK_DIAGNOSTIC,		/* Failed boot diag */
+	__ATA_QUIRK_NODMA,		/* DMA problems */
+	__ATA_QUIRK_NONCQ,		/* Don't use NCQ */
+	__ATA_QUIRK_MAX_SEC_128,	/* Limit max sects to 128 */
+	__ATA_QUIRK_BROKEN_HPA,		/* Broken HPA */
+	__ATA_QUIRK_DISABLE,		/* Disable it */
+	__ATA_QUIRK_HPA_SIZE,		/* Native size off by one */
+	__ATA_QUIRK_IVB,		/* cbl det validity bit bugs */
+	__ATA_QUIRK_STUCK_ERR,		/* Stuck ERR on next PACKET */
+	__ATA_QUIRK_BRIDGE_OK,		/* No bridge limits */
+	__ATA_QUIRK_ATAPI_MOD16_DMA,	/* Use ATAPI DMA for commands that */
+					/* are not a multiple of 16 bytes */
+	__ATA_QUIRK_FIRMWARE_WARN,	/* Firmware update warning */
+	__ATA_QUIRK_1_5_GBPS,		/* Force 1.5 Gbps */
+	__ATA_QUIRK_NOSETXFER,		/* Skip SETXFER, SATA only */
+	__ATA_QUIRK_BROKEN_FPDMA_AA,	/* Skip AA */
+	__ATA_QUIRK_DUMP_ID,		/* Dump IDENTIFY data */
+	__ATA_QUIRK_MAX_SEC_LBA48,	/* Set max sects to 65535 */
+	__ATA_QUIRK_ATAPI_DMADIR,	/* Device requires dmadir */
+	__ATA_QUIRK_NO_NCQ_TRIM,	/* Do not use queued TRIM */
+	__ATA_QUIRK_NOLPM,		/* Do not use LPM */
+	__ATA_QUIRK_WD_BROKEN_LPM,	/* Some WDs have broken LPM */
+	__ATA_QUIRK_ZERO_AFTER_TRIM,	/* Guarantees zero after trim */
+	__ATA_QUIRK_NO_DMA_LOG,		/* Do not use DMA for log read */
+	__ATA_QUIRK_NOTRIM,		/* Do not use TRIM */
+	__ATA_QUIRK_MAX_SEC_1024,	/* Limit max sects to 1024 */
+	__ATA_QUIRK_MAX_TRIM_128M,	/* Limit max trim size to 128M */
+	__ATA_QUIRK_NO_NCQ_ON_ATI,	/* Disable NCQ on ATI chipset */
+	__ATA_QUIRK_NO_ID_DEV_LOG,	/* Identify device log missing */
+	__ATA_QUIRK_NO_LOG_DIR,		/* Do not read log directory */
+	__ATA_QUIRK_NO_FUA,		/* Do not use FUA */
+
+	__ATA_QUIRK_MAX,
+};
+
 enum {
 	/* various global constants */
 	LIBATA_MAX_PRD		= ATA_MAX_PRD / 2,
@@ -366,40 +406,38 @@ enum {
 	 * Quirk flags: may be set by libata or controller drivers on drives.
 	 * Some quirks may be drive/controller pair dependent.
 	 */
-	ATA_QUIRK_DIAGNOSTIC	= (1 << 0),	/* Failed boot diag */
-	ATA_QUIRK_NODMA		= (1 << 1),	/* DMA problems */
-	ATA_QUIRK_NONCQ		= (1 << 2),	/* Do not use NCQ */
-	ATA_QUIRK_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
-	ATA_QUIRK_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
-	ATA_QUIRK_DISABLE	= (1 << 5),	/* Disable it */
-	ATA_QUIRK_HPA_SIZE	= (1 << 6),	/* Native size off by one */
-	ATA_QUIRK_IVB		= (1 << 8),	/* CBL det validity bit bugs */
-	ATA_QUIRK_STUCK_ERR	= (1 << 9),	/* Stuck ERR on next PACKET */
-	ATA_QUIRK_BRIDGE_OK	= (1 << 10),	/* No bridge limits */
-	ATA_QUIRK_ATAPI_MOD16_DMA = (1 << 11),	/* Use ATAPI DMA for commands */
-						/* not multiple of 16 bytes */
-	ATA_QUIRK_FIRMWARE_WARN = (1 << 12),	/* Firmware update warning */
-	ATA_QUIRK_1_5_GBPS	= (1 << 13),	/* Force 1.5 Gbps */
-	ATA_QUIRK_NOSETXFER	= (1 << 14),	/* Skip SETXFER, SATA only */
-	ATA_QUIRK_BROKEN_FPDMA_AA = (1 << 15),	/* Skip AA */
-	ATA_QUIRK_DUMP_ID	= (1 << 16),	/* Dump IDENTIFY data */
-	ATA_QUIRK_MAX_SEC_LBA48 = (1 << 17),	/* Set max sects to 65535 */
-	ATA_QUIRK_ATAPI_DMADIR	= (1 << 18),	/* Device requires dmadir */
-	ATA_QUIRK_NO_NCQ_TRIM	= (1 << 19),	/* Do not use queued TRIM */
-	ATA_QUIRK_NOLPM		= (1 << 20),	/* Do not use LPM */
-	ATA_QUIRK_WD_BROKEN_LPM = (1 << 21),	/* Some WDs have broken LPM */
-	ATA_QUIRK_ZERO_AFTER_TRIM = (1 << 22),	/* Guarantees zero after trim */
-	ATA_QUIRK_NO_DMA_LOG	= (1 << 23),	/* Do not use DMA for log read */
-	ATA_QUIRK_NOTRIM	= (1 << 24),	/* Do not use TRIM */
-	ATA_QUIRK_MAX_SEC_1024	= (1 << 25),	/* Limit max sects to 1024 */
-	ATA_QUIRK_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
-	ATA_QUIRK_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
-	ATA_QUIRK_NO_ID_DEV_LOG = (1 << 28),	/* Identify device log missing */
-	ATA_QUIRK_NO_LOG_DIR	= (1 << 29),	/* Do not read log directory */
-	ATA_QUIRK_NO_FUA	= (1 << 30),	/* Do not use FUA */
-
-	 /* DMA mask for user DMA control: User visible values; DO NOT
-	    renumber */
+	ATA_QUIRK_DIAGNOSTIC		= (1U << __ATA_QUIRK_DIAGNOSTIC),
+	ATA_QUIRK_NODMA			= (1U << __ATA_QUIRK_NODMA),
+	ATA_QUIRK_NONCQ			= (1U << __ATA_QUIRK_NONCQ),
+	ATA_QUIRK_MAX_SEC_128		= (1U << __ATA_QUIRK_MAX_SEC_128),
+	ATA_QUIRK_BROKEN_HPA		= (1U << __ATA_QUIRK_BROKEN_HPA),
+	ATA_QUIRK_DISABLE		= (1U << __ATA_QUIRK_DISABLE),
+	ATA_QUIRK_HPA_SIZE		= (1U << __ATA_QUIRK_HPA_SIZE),
+	ATA_QUIRK_IVB			= (1U << __ATA_QUIRK_IVB),
+	ATA_QUIRK_STUCK_ERR		= (1U << __ATA_QUIRK_STUCK_ERR),
+	ATA_QUIRK_BRIDGE_OK		= (1U << __ATA_QUIRK_BRIDGE_OK),
+	ATA_QUIRK_ATAPI_MOD16_DMA	= (1U << __ATA_QUIRK_ATAPI_MOD16_DMA),
+	ATA_QUIRK_FIRMWARE_WARN		= (1U << __ATA_QUIRK_FIRMWARE_WARN),
+	ATA_QUIRK_1_5_GBPS		= (1U << __ATA_QUIRK_1_5_GBPS),
+	ATA_QUIRK_NOSETXFER		= (1U << __ATA_QUIRK_NOSETXFER),
+	ATA_QUIRK_BROKEN_FPDMA_AA	= (1U << __ATA_QUIRK_BROKEN_FPDMA_AA),
+	ATA_QUIRK_DUMP_ID		= (1U << __ATA_QUIRK_DUMP_ID),
+	ATA_QUIRK_MAX_SEC_LBA48		= (1U << __ATA_QUIRK_MAX_SEC_LBA48),
+	ATA_QUIRK_ATAPI_DMADIR		= (1U << __ATA_QUIRK_ATAPI_DMADIR),
+	ATA_QUIRK_NO_NCQ_TRIM		= (1U << __ATA_QUIRK_NO_NCQ_TRIM),
+	ATA_QUIRK_NOLPM			= (1U << __ATA_QUIRK_NOLPM),
+	ATA_QUIRK_WD_BROKEN_LPM		= (1U << __ATA_QUIRK_WD_BROKEN_LPM),
+	ATA_QUIRK_ZERO_AFTER_TRIM	= (1U << __ATA_QUIRK_ZERO_AFTER_TRIM),
+	ATA_QUIRK_NO_DMA_LOG		= (1U << __ATA_QUIRK_NO_DMA_LOG),
+	ATA_QUIRK_NOTRIM		= (1U << __ATA_QUIRK_NOTRIM),
+	ATA_QUIRK_MAX_SEC_1024		= (1U << __ATA_QUIRK_MAX_SEC_1024),
+	ATA_QUIRK_MAX_TRIM_128M		= (1U << __ATA_QUIRK_MAX_TRIM_128M),
+	ATA_QUIRK_NO_NCQ_ON_ATI		= (1U << __ATA_QUIRK_NO_NCQ_ON_ATI),
+	ATA_QUIRK_NO_ID_DEV_LOG		= (1U << __ATA_QUIRK_NO_ID_DEV_LOG),
+	ATA_QUIRK_NO_LOG_DIR		= (1U << __ATA_QUIRK_NO_LOG_DIR),
+	ATA_QUIRK_NO_FUA		= (1U << __ATA_QUIRK_NO_FUA),
+
+	/* User visible DMA mask for DMA control. DO NOT renumber. */
 	ATA_DMA_MASK_ATA	= (1 << 0),	/* DMA on ATA Disk */
 	ATA_DMA_MASK_ATAPI	= (1 << 1),	/* DMA on ATAPI */
 	ATA_DMA_MASK_CFA	= (1 << 2),	/* DMA on CF Card */
-- 
cgit v1.2.3


From 6774e90f3146818c284b805b16ecdc144dda1c60 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Thu, 18 Jul 2024 14:05:01 -0700
Subject: of: Add test managed wrappers for of_overlay_apply()/of_node_put()

Add test managed wrappers for of_overlay_apply() that automatically
removes the overlay when the test is finished. This API is intended for
use by KUnit tests that test code which relies on 'struct device_node's
and of_*() APIs.

KUnit tests will call of_overlay_apply_kunit() to load an overlay that's
been built into the kernel image. When the test is complete, the overlay
will be removed.

This has a few benefits:

 1) It keeps the tests hermetic because the overlay is removed when the
    test is complete. Tests won't even be aware that an overlay was
    loaded in another test.

 2) The overlay code can live right next to the unit test that loads it.
    The overlay and the unit test can be compiled into one kernel module
    if desired.

 3) We can test different device tree configurations by loading
    different overlays. The overlays can be written for a specific test,
    and there can be many of them loaded per-test without needing to jam
    all possible combinations into one DTB.

 4) It also allows KUnit to test device tree dependent code on any
    architecture, not just UML. This allows KUnit tests to test
    architecture specific device tree code.

There are some potential pitfalls though. Test authors need to be
careful to not overwrite properties in the live tree. The easiest way to
do this is to add and remove nodes with a 'kunit-' prefix, almost
guaranteeing that the same node won't be present in the tree loaded at
boot.

Suggested-by: Rob Herring <robh@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20240718210513.3801024-3-sboyd@kernel.org
---
 include/kunit/of.h | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 include/kunit/of.h

(limited to 'include')

diff --git a/include/kunit/of.h b/include/kunit/of.h
new file mode 100644
index 000000000000..48d4e70c9666
--- /dev/null
+++ b/include/kunit/of.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KUNIT_OF_H
+#define _KUNIT_OF_H
+
+#include <kunit/test.h>
+
+struct device_node;
+
+#ifdef CONFIG_OF
+
+void of_node_put_kunit(struct kunit *test, struct device_node *node);
+
+#else
+
+static inline
+void of_node_put_kunit(struct kunit *test, struct device_node *node)
+{
+	kunit_skip(test, "requires CONFIG_OF");
+}
+
+#endif /* !CONFIG_OF */
+
+#if defined(CONFIG_OF) && defined(CONFIG_OF_OVERLAY) && defined(CONFIG_OF_EARLY_FLATTREE)
+
+int of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt,
+			       u32 overlay_fdt_size, int *ovcs_id);
+#else
+
+static inline int
+of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt,
+			   u32 overlay_fdt_size, int *ovcs_id)
+{
+	kunit_skip(test, "requires CONFIG_OF and CONFIG_OF_OVERLAY and CONFIG_OF_EARLY_FLATTREE for root node");
+	return -EINVAL;
+}
+
+#endif
+
+/**
+ * __of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() variant
+ * @test: test context
+ * @overlay_begin: start address of overlay to apply
+ * @overlay_end: end address of overlay to apply
+ *
+ * This is mostly internal API. See of_overlay_apply_kunit() for the wrapper
+ * that makes this easier to use.
+ *
+ * Similar to of_overlay_fdt_apply(), except the overlay is managed by the test
+ * case and is automatically removed with of_overlay_remove() after the test
+ * case concludes.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static inline int __of_overlay_apply_kunit(struct kunit *test,
+					   u8 *overlay_begin,
+					   const u8 *overlay_end)
+{
+	int unused;
+
+	return of_overlay_fdt_apply_kunit(test, overlay_begin,
+					  overlay_end - overlay_begin,
+					  &unused);
+}
+
+/**
+ * of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() for built-in overlays
+ * @test: test context
+ * @overlay_name: name of overlay to apply
+ *
+ * This macro is used to apply a device tree overlay built with the
+ * cmd_dt_S_dtbo rule in scripts/Makefile.lib that has been compiled into the
+ * kernel image or KUnit test module. The overlay is automatically removed when
+ * the test is finished.
+ *
+ * Unit tests that need device tree nodes should compile an overlay file with
+ * @overlay_name\.dtbo.o in their Makefile along with their unit test and then
+ * load the overlay during their test. The @overlay_name matches the filename
+ * of the overlay without the dtbo filename extension. If CONFIG_OF_OVERLAY is
+ * not enabled, the @test will be skipped.
+ *
+ * In the Makefile
+ *
+ * .. code-block:: none
+ *
+ *	obj-$(CONFIG_OF_OVERLAY_KUNIT_TEST) += overlay_test.o kunit_overlay_test.dtbo.o
+ *
+ * In the test
+ *
+ * .. code-block:: c
+ *
+ *	static void of_overlay_kunit_of_overlay_apply(struct kunit *test)
+ *	{
+ *		struct device_node *np;
+ *
+ *		KUNIT_ASSERT_EQ(test, 0,
+ *				of_overlay_apply_kunit(test, kunit_overlay_test));
+ *
+ *		np = of_find_node_by_name(NULL, "test-kunit");
+ *		KUNIT_EXPECT_NOT_ERR_OR_NULL(test, np);
+ *		of_node_put(np);
+ *	}
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+#define of_overlay_apply_kunit(test, overlay_name)		\
+({								\
+	extern uint8_t __dtbo_##overlay_name##_begin[];		\
+	extern uint8_t __dtbo_##overlay_name##_end[];		\
+								\
+	__of_overlay_apply_kunit((test),			\
+			__dtbo_##overlay_name##_begin,		\
+			__dtbo_##overlay_name##_end);		\
+})
+
+#endif
-- 
cgit v1.2.3


From 5ac79730324c6f37106ce397586020ffe6e8e234 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Thu, 18 Jul 2024 14:05:04 -0700
Subject: platform: Add test managed platform_device/driver APIs

Introduce KUnit resource wrappers around platform_driver_register(),
platform_device_alloc(), and platform_device_add() so that test authors
can register platform drivers/devices from their tests and have the
drivers/devices automatically be unregistered when the test is done.

This makes test setup code simpler when a platform driver or platform
device is needed. Add a few test cases at the same time to make sure the
APIs work as intended.

Cc: Brendan Higgins <brendan.higgins@linux.dev>
Reviewed-by: David Gow <davidgow@google.com>
Cc: Rae Moar <rmoar@google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20240718210513.3801024-6-sboyd@kernel.org
---
 include/kunit/platform_device.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/kunit/platform_device.h

(limited to 'include')

diff --git a/include/kunit/platform_device.h b/include/kunit/platform_device.h
new file mode 100644
index 000000000000..0fc0999d2420
--- /dev/null
+++ b/include/kunit/platform_device.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KUNIT_PLATFORM_DRIVER_H
+#define _KUNIT_PLATFORM_DRIVER_H
+
+struct kunit;
+struct platform_device;
+struct platform_driver;
+
+struct platform_device *
+kunit_platform_device_alloc(struct kunit *test, const char *name, int id);
+int kunit_platform_device_add(struct kunit *test, struct platform_device *pdev);
+
+int kunit_platform_device_prepare_wait_for_probe(struct kunit *test,
+						 struct platform_device *pdev,
+						 struct completion *x);
+
+int kunit_platform_driver_register(struct kunit *test,
+				   struct platform_driver *drv);
+
+#endif
-- 
cgit v1.2.3


From d690bd11e87adfc684265209a5aabd1f58fa367e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Thu, 18 Jul 2024 14:05:05 -0700
Subject: clk: Add test managed clk provider/consumer APIs

Unit tests are more ergonomic and simpler to understand if they don't
have to hoist a bunch of code into the test harness init and exit
functions. Add some test managed wrappers for the clk APIs so that clk
unit tests can write more code in the actual test and less code in the
harness.

Only add APIs that are used for now. More wrappers can be added in the
future as necessary.

Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: David Gow <davidgow@google.com>
Cc: Rae Moar <rmoar@google.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20240718210513.3801024-7-sboyd@kernel.org
---
 include/kunit/clk.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/kunit/clk.h

(limited to 'include')

diff --git a/include/kunit/clk.h b/include/kunit/clk.h
new file mode 100644
index 000000000000..73bc99cefe7b
--- /dev/null
+++ b/include/kunit/clk.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CLK_KUNIT_H
+#define _CLK_KUNIT_H
+
+struct clk;
+struct clk_hw;
+struct device;
+struct device_node;
+struct kunit;
+
+struct clk *
+clk_get_kunit(struct kunit *test, struct device *dev, const char *con_id);
+struct clk *
+of_clk_get_kunit(struct kunit *test, struct device_node *np, int index);
+
+struct clk *
+clk_hw_get_clk_kunit(struct kunit *test, struct clk_hw *hw, const char *con_id);
+struct clk *
+clk_hw_get_clk_prepared_enabled_kunit(struct kunit *test, struct clk_hw *hw,
+				      const char *con_id);
+
+int clk_prepare_enable_kunit(struct kunit *test, struct clk *clk);
+
+int clk_hw_register_kunit(struct kunit *test, struct device *dev, struct clk_hw *hw);
+int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node,
+			     struct clk_hw *hw);
+
+#endif
-- 
cgit v1.2.3


From 4e9652003bc39032c3ab79e607aa3d1913c7cf57 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 26 Jul 2024 17:28:15 +0200
Subject: ALSA: control: Annotate snd_kcontrol with __counted_by()

struct snd_kcontrol contains a flex array of snd_kcontrol_volatile
objects at its end, and the array size is stored in count field.
This can be annotated gracefully with __counted_by() for catching
possible array overflows.

One additional change is the order of the count field initialization;
The assignment of the count field is moved before assignment of vd[]
elements for avoiding false-positive warnings from compilers.

Link: https://patch.msgid.link/20240726152840.8629-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/control.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/control.h b/include/sound/control.h
index c1659036c4a7..13511c50825f 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -81,7 +81,7 @@ struct snd_kcontrol {
 	unsigned long private_value;
 	void *private_data;
 	void (*private_free)(struct snd_kcontrol *kcontrol);
-	struct snd_kcontrol_volatile vd[];	/* volatile data */
+	struct snd_kcontrol_volatile vd[] __counted_by(count);	/* volatile data */
 };
 
 #define snd_kcontrol(n) list_entry(n, struct snd_kcontrol, list)
-- 
cgit v1.2.3


From 0642a3c5cacc0321c755d45ae48f2c84475469a6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 29 Jul 2024 16:13:14 +0200
Subject: ALSA: ump: Update substream name from assigned FB names

We had a nice name scheme in ALSA sequencer UMP binding for each
sequencer port referring to each assigned Function Block name, while
the legacy rawmidi refers only to the UMP Endpoint name.  It's better
to align both.

This patch moves the UMP Group attribute update functions into the
core UMP code from the sequencer binding code, and improve the
substream name of the legacy rawmidi.

Link: https://patch.msgid.link/20240729141315.18253-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ump.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/sound/ump.h b/include/sound/ump.h
index 91238dabe307..7f68056acdff 100644
--- a/include/sound/ump.h
+++ b/include/sound/ump.h
@@ -13,6 +13,14 @@ struct snd_ump_ops;
 struct ump_cvt_to_ump;
 struct snd_seq_ump_ops;
 
+struct snd_ump_group {
+	int group;			/* group index (0-based) */
+	unsigned int dir_bits;		/* directions */
+	bool active;			/* activeness */
+	bool valid;			/* valid group (referred by blocks) */
+	char name[64];			/* group name */
+};
+
 struct snd_ump_endpoint {
 	struct snd_rawmidi core;	/* raw UMP access */
 
@@ -41,6 +49,8 @@ struct snd_ump_endpoint {
 
 	struct mutex open_mutex;
 
+	struct snd_ump_group groups[SNDRV_UMP_MAX_GROUPS]; /* table of groups */
+
 #if IS_ENABLED(CONFIG_SND_UMP_LEGACY_RAWMIDI)
 	spinlock_t legacy_locks[2];
 	struct snd_rawmidi *legacy_rmidi;
-- 
cgit v1.2.3


From b6a66e521a2032f7fcba2af5a9bcbaeaa19b7ca3 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Sat, 27 Jul 2024 12:01:23 +0200
Subject: mptcp: sched: check both directions for backup

The 'mptcp_subflow_context' structure has two items related to the
backup flags:

 - 'backup': the subflow has been marked as backup by the other peer

 - 'request_bkup': the backup flag has been set by the host

Before this patch, the scheduler was only looking at the 'backup' flag.
That can make sense in some cases, but it looks like that's not what we
wanted for the general use, because either the path-manager was setting
both of them when sending an MP_PRIO, or the receiver was duplicating
the 'backup' flag in the subflow request.

Note that the use of these two flags in the path-manager are going to be
fixed in the next commits, but this change here is needed not to modify
the behaviour.

Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/trace/events/mptcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index 09e72215b9f9..085b749cdd97 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send,
 		struct sock *ssk;
 
 		__entry->active = mptcp_subflow_active(subflow);
-		__entry->backup = subflow->backup;
+		__entry->backup = subflow->backup || subflow->request_bkup;
 
 		if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock))
 			__entry->free = sk_stream_memory_free(subflow->tcp_sock);
-- 
cgit v1.2.3


From 22f5468731491e53356ba7c028f0fdea20b18e2c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 30 Jul 2024 10:36:47 -0700
Subject: minmax: improve macro expansion and type checking

This clarifies the rules for min()/max()/clamp() type checking and makes
them a much more efficient macro expansion.

In particular, we now look at the type and range of the inputs to see
whether they work together, generating a mask of acceptable comparisons,
and then just verifying that the inputs have a shared case:

 - an expression with a signed type can be used for
    (1) signed comparisons
    (2) unsigned comparisons if it is statically known to have a
        non-negative value

 - an expression with an unsigned type can be used for
    (3) unsigned comparison
    (4) signed comparisons if the type is smaller than 'int' and thus
        the C integer promotion rules will make it signed anyway

Here rule (1) and (3) are obvious, and rule (2) is important in order to
allow obvious trivial constants to be used together with unsigned
values.

Rule (4) is not necessarily a good idea, but matches what we used to do,
and we have extant cases of this situation in the kernel.  Notably with
bcachefs having an expression like

	min(bch2_bucket_sectors_dirty(a), ca->mi.bucket_size)

where bch2_bucket_sectors_dirty() returns an 's64', and
'ca->mi.bucket_size' is of type 'u16'.

Technically that bcachefs comparison is clearly sensible on a C type
level, because the 'u16' will go through the normal C integer promotion,
and become 'int', and then we're comparing two signed values and
everything looks sane.

However, it's not entirely clear that a 'min(s64,u16)' operation makes a
lot of conceptual sense, and it's possible that we will remove rule (4).
After all, the _reason_ we have these complicated type checks is exactly
that the C type promotion rules are not very intuitive.

But at least for now the rule is in place for backwards compatibility.

Also note that rule (2) existed before, but is hugely relaxed by this
commit.  It used to be true only for the simplest compile-time
non-negative integer constants.  The new macro model will allow cases
where the compiler can trivially see that an expression is non-negative
even if it isn't necessarily a constant.

For example, the amdgpu driver does

	min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F));

because our old 'min()' macro would see that 'pia[addr] & 0x3F' is of
type 'int' and clearly not a C constant expression, so doing a 'min()'
with a 'size_t' is a signedness violation.

Our new 'min()' macro still sees that 'pia[addr] & 0x3F' is of type
'int', but is smart enough to also see that it is clearly non-negative,
and thus would allow that case without any complaints.

Cc: Arnd Bergmann <arnd@kernel.org>
Cc: David Laight <David.Laight@aculab.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h |  9 ++++++
 include/linux/minmax.h   | 74 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 68 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 2594553bb30b..2df665fa2964 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -296,6 +296,15 @@ static inline void *offset_to_ptr(const int *off)
 #define is_signed_type(type) (((type)(-1)) < (__force type)1)
 #define is_unsigned_type(type) (!is_signed_type(type))
 
+/*
+ * Useful shorthand for "is this condition known at compile-time?"
+ *
+ * Note that the condition may involve non-constant values,
+ * but the compiler may know enough about the details of the
+ * values to determine that the condition is statically true.
+ */
+#define statically_true(x) (__builtin_constant_p(x) && (x))
+
 /*
  * This is needed in functions which generate the stack canary, see
  * arch/x86/kernel/smpboot.c::start_secondary() for an example.
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index e3e4353df983..41da6f85a407 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -26,19 +26,63 @@
 #define __typecheck(x, y) \
 	(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
 
-/* is_signed_type() isn't a constexpr for pointer types */
-#define __is_signed(x) 								\
-	__builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))),	\
-		is_signed_type(typeof(x)), 0)
+/*
+ * __sign_use for integer expressions:
+ *   bit #0 set if ok for unsigned comparisons
+ *   bit #1 set if ok for signed comparisons
+ *
+ * In particular, statically non-negative signed integer
+ * expressions are ok for both.
+ *
+ * NOTE! Unsigned types smaller than 'int' are implicitly
+ * converted to 'int' in expressions, and are accepted for
+ * signed conversions for now. This is debatable.
+ *
+ * Note that 'x' is the original expression, and 'ux' is
+ * the unique variable that contains the value.
+ *
+ * We use 'ux' for pure type checking, and 'x' for when
+ * we need to look at the value (but without evaluating
+ * it for side effects! Careful to only ever evaluate it
+ * with sizeof() or __builtin_constant_p() etc).
+ *
+ * Pointers end up being checked by the normal C type
+ * rules at the actual comparison, and these expressions
+ * only need to be careful to not cause warnings for
+ * pointer use.
+ */
+#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux))
+#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4))
+#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \
+	__signed_type_use(x,ux):__unsigned_type_use(x,ux))
+
+/*
+ * To avoid warnings about casting pointers to integers
+ * of different sizes, we need that special sign type.
+ *
+ * On 64-bit we can just always use 'long', since any
+ * integer or pointer type can just be cast to that.
+ *
+ * This does not work for 128-bit signed integers since
+ * the cast would truncate them, but we do not use s128
+ * types in the kernel (we do use 'u128', but they will
+ * be handled by the !is_signed_type() case).
+ *
+ * NOTE! The cast is there only to avoid any warnings
+ * from when values that aren't signed integer types.
+ */
+#ifdef CONFIG_64BIT
+  #define __signed_type(ux) long
+#else
+  #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L))
+#endif
+#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0)
 
-/* True for a non-negative signed int constant */
-#define __is_noneg_int(x)	\
-	(__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0)
+#define __types_ok(x,y,ux,uy) \
+	(__sign_use(x,ux) & __sign_use(y,uy))
 
-#define __types_ok(x, y, ux, uy) 				\
-	(__is_signed(ux) == __is_signed(uy) ||			\
-	 __is_signed((ux) + 0) == __is_signed((uy) + 0) ||	\
-	 __is_noneg_int(x) || __is_noneg_int(y))
+#define __types_ok3(x,y,z,ux,uy,uz) \
+	(__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz))
 
 #define __cmp_op_min <
 #define __cmp_op_max >
@@ -53,8 +97,8 @@
 
 #define __careful_cmp_once(op, x, y, ux, uy) ({		\
 	__auto_type ux = (x); __auto_type uy = (y);	\
-	static_assert(__types_ok(x, y, ux, uy),		\
-		#op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \
+	BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy),	\
+		#op"("#x", "#y") signedness error");	\
 	__cmp(op, ux, uy); })
 
 #define __careful_cmp(op, x, y) \
@@ -70,8 +114,8 @@
 	static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), 	\
 			(lo) <= (hi), true),					\
 		"clamp() low limit " #lo " greater than high limit " #hi);	\
-	static_assert(__types_ok(uval, lo, uval, ulo), "clamp() 'lo' signedness error");	\
-	static_assert(__types_ok(uval, hi, uval, uhi), "clamp() 'hi' signedness error");	\
+	BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi),			\
+		"clamp("#val", "#lo", "#hi") signedness error");		\
 	__clamp(uval, ulo, uhi); })
 
 #define __careful_clamp(val, lo, hi) \
-- 
cgit v1.2.3


From f2881dfdaaa9ec873dbd383ef5512fc31e576cbb Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 29 Jul 2024 11:26:34 +0200
Subject: drm/xe/oa/uapi: Make bit masks unsigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with gcc-5:

    In function ‘decode_oa_format.isra.26’,
	inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6:
    ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant
    [...]
    ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’
       __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
       ^
    drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’
      u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
		      ^

Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 include/uapi/drm/xe_drm.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 29425d7fdc77..b6fbe4988f2e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1598,10 +1598,10 @@ enum drm_xe_oa_property_id {
 	 * b. Counter select c. Counter size and d. BC report. Also refer to the
 	 * oa_formats array in drivers/gpu/drm/xe/xe_oa.c.
 	 */
-#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xff << 0)
-#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xff << 8)
-#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xff << 16)
-#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xff << 24)
+#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xffu << 0)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xffu << 8)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xffu << 16)
+#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xffu << 24)
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
-- 
cgit v1.2.3


From 1cbe8143fd2f588031a5f157d15ae15ce12215e2 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Tue, 30 Jul 2024 13:37:33 +0800
Subject: bpf: kprobe: Remove unused declaring of bpf_kprobe_override

After the commit 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction
pointer with original one"), "bpf_kprobe_override" is not used anywhere
anymore, and we can remove it now.

Fixes: 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one")
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240730053733.885785-1-dongml2@chinatelecom.cn
---
 include/linux/trace_events.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 9df3e2973626..9435185c10ef 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -880,7 +880,6 @@ do {									\
 struct perf_event;
 
 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
-DECLARE_PER_CPU(int, bpf_kprobe_override);
 
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
-- 
cgit v1.2.3


From c149c4a48b19afbf0c383614e57b452d39b154de Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Sat, 13 Jul 2024 08:59:16 +0000
Subject: cgroup/cpuset: Remove cpuset_slab_spread_rotor

Since the SLAB implementation was removed in v6.8, so the
cpuset_slab_spread_rotor is no longer used and can be removed.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpuset.h | 6 ------
 include/linux/sched.h  | 1 -
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index de4cf0ee96f7..2a6981eeebf8 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -113,7 +113,6 @@ extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
 			    struct pid *pid, struct task_struct *tsk);
 
 extern int cpuset_mem_spread_node(void);
-extern int cpuset_slab_spread_node(void);
 
 static inline int cpuset_do_page_mem_spread(void)
 {
@@ -246,11 +245,6 @@ static inline int cpuset_mem_spread_node(void)
 	return 0;
 }
 
-static inline int cpuset_slab_spread_node(void)
-{
-	return 0;
-}
-
 static inline int cpuset_do_page_mem_spread(void)
 {
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8d150343d42..3773c1c8f099 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1243,7 +1243,6 @@ struct task_struct {
 	/* Sequence number to catch updates: */
 	seqcount_spinlock_t		mems_allowed_seq;
 	int				cpuset_mem_spread_rotor;
-	int				cpuset_slab_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
 	/* Control Group info protected by css_set_lock: */
-- 
cgit v1.2.3


From 93c8332c8373fee415bd79f08d5ba4ba7ca5ad15 Mon Sep 17 00:00:00 2001
From: Xavier <xavier_qy@163.com>
Date: Thu, 4 Jul 2024 14:24:43 +0800
Subject: Union-Find: add a new module in kernel library

This patch implements a union-find data structure in the kernel library,
which includes operations for allocating nodes, freeing nodes,
finding the root of a node, and merging two nodes.

Signed-off-by: Xavier <xavier_qy@163.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/union_find.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 include/linux/union_find.h

(limited to 'include')

diff --git a/include/linux/union_find.h b/include/linux/union_find.h
new file mode 100644
index 000000000000..cfd49263c138
--- /dev/null
+++ b/include/linux/union_find.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_UNION_FIND_H
+#define __LINUX_UNION_FIND_H
+/**
+ * union_find.h - union-find data structure implementation
+ *
+ * This header provides functions and structures to implement the union-find
+ * data structure. The union-find data structure is used to manage disjoint
+ * sets and supports efficient union and find operations.
+ *
+ * See Documentation/core-api/union_find.rst for documentation and samples.
+ */
+
+struct uf_node {
+	struct uf_node *parent;
+	unsigned int rank;
+};
+
+/* This macro is used for static initialization of a union-find node. */
+#define UF_INIT_NODE(node)	{.parent = &node, .rank = 0}
+
+/**
+ * uf_node_init - Initialize a union-find node
+ * @node: pointer to the union-find node to be initialized
+ *
+ * This function sets the parent of the node to itself and
+ * initializes its rank to 0.
+ */
+static inline void uf_node_init(struct uf_node *node)
+{
+	node->parent = node;
+	node->rank = 0;
+}
+
+/* find the root of a node */
+struct uf_node *uf_find(struct uf_node *node);
+
+/* Merge two intersecting nodes */
+void uf_union(struct uf_node *node1, struct uf_node *node2);
+
+#endif /* __LINUX_UNION_FIND_H */
-- 
cgit v1.2.3


From 89add40066f9ed9abe5f7f886fe5789ff7e0c50e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 29 Jul 2024 16:10:12 -0400
Subject: net: drop bad gso csum_start and offset in virtio_net_hdr

Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb
for GSO packets.

The function already checks that a checksum requested with
VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets
this might not hold for segs after segmentation.

Syzkaller demonstrated to reach this warning in skb_checksum_help

	offset = skb_checksum_start_offset(skb);
	ret = -EINVAL;
	if (WARN_ON_ONCE(offset >= skb_headlen(skb)))

By injecting a TSO packet:

WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0
 ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774
 ip_finish_output_gso net/ipv4/ip_output.c:279 [inline]
 __ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301
 iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82
 ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813
 __gre_xmit net/ipv4/ip_gre.c:469 [inline]
 ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661
 __netdev_start_xmit include/linux/netdevice.h:4850 [inline]
 netdev_start_xmit include/linux/netdevice.h:4864 [inline]
 xmit_one net/core/dev.c:3595 [inline]
 dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611
 __dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261
 packet_snd net/packet/af_packet.c:3073 [inline]

The geometry of the bad input packet at tcp_gso_segment:

[   52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0
[   52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244
[   52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0))
[   52.003050][ T8403] csum(0x60000c7 start=199 offset=1536
ip_summed=3 complete_sw=0 valid=0 level=0)

Mitigate with stricter input validation.

csum_offset: for GSO packets, deduce the correct value from gso_type.
This is already done for USO. Extend it to TSO. Let UFO be:
udp[46]_ufo_fragment ignores these fields and always computes the
checksum in software.

csum_start: finding the real offset requires parsing to the transport
header. Do not add a parser, use existing segmentation parsing. Thanks
to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded.
Again test both TSO and USO. Do not test UFO for the above reason, and
do not test UDP tunnel offload.

GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be
CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit
from devices with no checksum offload"), but then still these fields
are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no
need to test for ip_summed == CHECKSUM_PARTIAL first.

This revises an existing fix mentioned in the Fixes tag, which broke
small packets with GSO offload, as detected by kselftests.

Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871
Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org
Fixes: e269d79c7d35 ("net: missing check virtio")
Cc: stable@vger.kernel.org
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d1d7825318c3..6c395a2600e8 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 	unsigned int thlen = 0;
 	unsigned int p_off = 0;
 	unsigned int ip_proto;
-	u64 ret, remainder, gso_size;
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
@@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
 		u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
 
-		if (hdr->gso_size) {
-			gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
-			ret = div64_u64_rem(skb->len, gso_size, &remainder);
-			if (!(ret && (hdr->gso_size > needed) &&
-						((remainder > needed) || (remainder == 0)))) {
-				return -EINVAL;
-			}
-			skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG;
-		}
-
 		if (!pskb_may_pull(skb, needed))
 			return -EINVAL;
 
@@ -182,6 +171,11 @@ retry:
 			if (gso_type != SKB_GSO_UDP_L4)
 				return -EINVAL;
 			break;
+		case SKB_GSO_TCPV4:
+		case SKB_GSO_TCPV6:
+			if (skb->csum_offset != offsetof(struct tcphdr, check))
+				return -EINVAL;
+			break;
 		}
 
 		/* Kernel has a special handling for GSO_BY_FRAGS. */
-- 
cgit v1.2.3


From 6dfbafd8a1d51a52a623d912a2219e9982020854 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 12 Jul 2024 16:08:00 +0200
Subject: soundwire: bus: drop unused driver name field

The soundwire driver name field is not currently used by any driver (and
even appears to never have been used) so drop it.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20240712140801.24267-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 94fc1b57c57b..5e0dd47a0412 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -704,8 +704,6 @@ struct sdw_master_device {
 	container_of(d, struct sdw_master_device, dev)
 
 struct sdw_driver {
-	const char *name;
-
 	int (*probe)(struct sdw_slave *sdw,
 			const struct sdw_device_id *id);
 	int (*remove)(struct sdw_slave *sdw);
-- 
cgit v1.2.3


From ba5c778cab1dd3e4918f940989e771e2818afee8 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sun, 30 Jun 2024 22:33:44 +0100
Subject: media: rc: remove unused tx_resolution field

The tx_resolution field is never read. In theory you can imagine this
field being useful for detecting whether the transmitter has the
resolution for the message you are trying to send, but I am not aware of
any hardware where this could be an issue.

Just remove.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/rc-core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/media/rc-core.h b/include/media/rc-core.h
index 803349599c27..d095908073ef 100644
--- a/include/media/rc-core.h
+++ b/include/media/rc-core.h
@@ -127,7 +127,6 @@ struct lirc_fh {
  * @min_timeout: minimum timeout supported by device
  * @max_timeout: maximum timeout supported by device
  * @rx_resolution : resolution (in us) of input sampler
- * @tx_resolution: resolution (in us) of output sampler
  * @lirc_dev: lirc device
  * @lirc_cdev: lirc char cdev
  * @gap_start: start time for gap after timeout if non-zero
@@ -194,7 +193,6 @@ struct rc_dev {
 	u32				min_timeout;
 	u32				max_timeout;
 	u32				rx_resolution;
-	u32				tx_resolution;
 #ifdef CONFIG_LIRC
 	struct device			lirc_dev;
 	struct cdev			lirc_cdev;
-- 
cgit v1.2.3


From a755947e050b8751fc5402609a7d600e9c756fa7 Mon Sep 17 00:00:00 2001
From: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Date: Mon, 15 Jul 2024 21:55:13 +0530
Subject: drm/dp: Describe target_rr_divider in struct drm_dp_as_sdp

Describe newly added parameter target_rr_divider in struct
drm_dp_as_sdp.

-v2:
Remove extra line from commit message.(Lucas)

-v3:
Rebase.

Fixes: a20c6d954d75 ("drm/dp: Add refresh rate divider to struct representing AS SDP")
Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Suraj Kandpal <suraj.kandpal@intel.com>
Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240715162514.2836421-1-mitulkumar.ajitkumar.golani@intel.com
---
 include/drm/display/drm_dp_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index bbb1cdc4fc68..279624833ea9 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -112,6 +112,7 @@ struct drm_dp_vsc_sdp {
  * @target_rr: Target Refresh
  * @duration_incr_ms: Successive frame duration increase
  * @duration_decr_ms: Successive frame duration decrease
+ * @target_rr_divider: Target refresh rate divider
  * @mode: Adaptive Sync Operation Mode
  */
 struct drm_dp_as_sdp {
-- 
cgit v1.2.3


From d579b04a52a183db47dfcb7a44304d7747d551e1 Mon Sep 17 00:00:00 2001
From: Yu-Ting Tseng <yutingtseng@google.com>
Date: Tue, 9 Jul 2024 00:00:47 -0700
Subject: binder: frozen notification

Frozen processes present a significant challenge in binder transactions.
When a process is frozen, it cannot, by design, accept and/or respond to
binder transactions. As a result, the sender needs to adjust its
behavior, such as postponing transactions until the peer process
unfreezes. However, there is currently no way to subscribe to these
state change events, making it impossible to implement frozen-aware
behaviors efficiently.

Introduce a binder API for subscribing to frozen state change events.
This allows programs to react to changes in peer process state,
mitigating issues related to binder transactions sent to frozen
processes.

Implementation details:
For a given binder_ref, the state of frozen notification can be one of
the followings:
1. Userspace doesn't want a notification. binder_ref->freeze is null.
2. Userspace wants a notification but none is in flight.
   list_empty(&binder_ref->freeze->work.entry) = true
3. A notification is in flight and waiting to be read by userspace.
   binder_ref_freeze.sent is false.
4. A notification was read by userspace and kernel is waiting for an ack.
   binder_ref_freeze.sent is true.

When a notification is in flight, new state change events are coalesced into
the existing binder_ref_freeze struct. If userspace hasn't picked up the
notification yet, the driver simply rewrites the state. Otherwise, the
notification is flagged as requiring a resend, which will be performed
once userspace acks the original notification that's inflight.

See https://r.android.com/3070045 for how userspace is going to use this
feature.

Signed-off-by: Yu-Ting Tseng <yutingtseng@google.com>
Acked-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20240709070047.4055369-4-yutingtseng@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/android/binder.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index d44a8118b2ed..1fd92021a573 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -236,6 +236,12 @@ struct binder_frozen_status_info {
 	__u32            async_recv;
 };
 
+struct binder_frozen_state_info {
+	binder_uintptr_t cookie;
+	__u32            is_frozen;
+	__u32            reserved;
+};
+
 /* struct binder_extened_error - extended error information
  * @id:		identifier for the failed operation
  * @command:	command as defined by binder_driver_return_protocol
@@ -467,6 +473,17 @@ enum binder_driver_return_protocol {
 	/*
 	 * The target of the last async transaction is frozen.  No parameters.
 	 */
+
+	BR_FROZEN_BINDER = _IOR('r', 21, struct binder_frozen_state_info),
+	/*
+	 * The cookie and a boolean (is_frozen) that indicates whether the process
+	 * transitioned into a frozen or an unfrozen state.
+	 */
+
+	BR_CLEAR_FREEZE_NOTIFICATION_DONE = _IOR('r', 22, binder_uintptr_t),
+	/*
+	 * void *: cookie
+	 */
 };
 
 enum binder_driver_command_protocol {
@@ -550,6 +567,25 @@ enum binder_driver_command_protocol {
 	/*
 	 * binder_transaction_data_sg: the sent command.
 	 */
+
+	BC_REQUEST_FREEZE_NOTIFICATION =
+			_IOW('c', 19, struct binder_handle_cookie),
+	/*
+	 * int: handle
+	 * void *: cookie
+	 */
+
+	BC_CLEAR_FREEZE_NOTIFICATION = _IOW('c', 20,
+					    struct binder_handle_cookie),
+	/*
+	 * int: handle
+	 * void *: cookie
+	 */
+
+	BC_FREEZE_NOTIFICATION_DONE = _IOW('c', 21, binder_uintptr_t),
+	/*
+	 * void *: cookie
+	 */
 };
 
 #endif /* _UAPI_LINUX_BINDER_H */
-- 
cgit v1.2.3


From 990c304930138dcd7a49763417e6e5313b81293e Mon Sep 17 00:00:00 2001
From: Patrick Rohr <prohr@google.com>
Date: Mon, 29 Jul 2024 15:00:59 -0700
Subject: Add support for PIO p flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

draft-ietf-6man-pio-pflag is adding a new flag to the Prefix Information
Option to signal that the network can allocate a unique IPv6 prefix per
client via DHCPv6-PD (see draft-ietf-v6ops-dhcp-pd-per-device).

When ra_honor_pio_pflag is enabled, the presence of a P-flag causes
SLAAC autoconfiguration to be disabled for that particular PIO.

An automated test has been added in Android (r.android.com/3195335) to
go along with this change.

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: David Lamparter <equinox@opensourcerouting.org>
Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Patrick Rohr <prohr@google.com>
Reviewed-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h   |  1 +
 include/net/addrconf.h | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 383a0ea2ab91..a6e2aadbb91b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -89,6 +89,7 @@ struct ipv6_devconf {
 	__u8		ioam6_enabled;
 	__u8		ndisc_evict_nocarrier;
 	__u8		ra_honor_pio_life;
+	__u8		ra_honor_pio_pflag;
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 62a407db1bf5..b18e81f0c9e1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -37,10 +37,14 @@ struct prefix_info {
 		struct __packed {
 #if defined(__BIG_ENDIAN_BITFIELD)
 			__u8	onlink : 1,
-			 	autoconf : 1,
-				reserved : 6;
+				autoconf : 1,
+				routeraddr : 1,
+				preferpd : 1,
+				reserved : 4;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-			__u8	reserved : 6,
+			__u8	reserved : 4,
+				preferpd : 1,
+				routeraddr : 1,
 				autoconf : 1,
 				onlink : 1;
 #else
-- 
cgit v1.2.3


From e6ce8a28c768dbbad3f818db286cd0f4c7a921a8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 31 Jul 2024 15:05:22 +0200
Subject: ALSA: ump: Transmit RPN/NRPN message at each MSB/LSB data reception

The UMP 1.1 spec says that an RPN/NRPN should be sent when one of the
following occurs:
* a CC 38 is received
* a subsequent CC 6 is received
* a CC 98, 99, 100, and 101 is received, indicating the last RPN/NRPN
  message has ended and a new one has started

That said, we should send a partial data even if it's not fully
filled.  Let's change the UMP conversion helper code to follow that
rule.

Link: https://patch.msgid.link/20240731130528.12600-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ump_convert.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h
index 28c364c63245..d099ae27f849 100644
--- a/include/sound/ump_convert.h
+++ b/include/sound/ump_convert.h
@@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank {
 	unsigned char cc_nrpn_msb, cc_nrpn_lsb;
 	unsigned char cc_data_msb, cc_data_lsb;
 	unsigned char cc_bank_msb, cc_bank_lsb;
+	bool cc_data_msb_set, cc_data_lsb_set;
 };
 
 /* context for converting from MIDI1 byte stream to UMP packet */
-- 
cgit v1.2.3


From 01ce1bf22adc0d09d906319787091ce784cb9914 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Tue, 23 Jul 2024 11:33:10 -0500
Subject: dt-bindings: clock: exynos850: Add TMU clock

Add a constant for TMU PCLK clock. It acts simultaneously as an
interface clock (to access TMU registers) and an operating clock which
makes TMU IP-core functional.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Link: https://lore.kernel.org/r/20240723163311.28654-1-semen.protsenko@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/exynos850.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h
index 7666241520f8..80dacda57229 100644
--- a/include/dt-bindings/clock/exynos850.h
+++ b/include/dt-bindings/clock/exynos850.h
@@ -358,6 +358,7 @@
 #define CLK_GOUT_UART_PCLK		32
 #define CLK_GOUT_WDT0_PCLK		33
 #define CLK_GOUT_WDT1_PCLK		34
+#define CLK_GOUT_BUSIF_TMU_PCLK		35
 
 /* CMU_CORE */
 #define CLK_MOUT_CORE_BUS_USER		1
-- 
cgit v1.2.3


From dbaeef363ea54f4c18112874b77503c72ba60fec Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 22 Jul 2024 19:54:51 +0300
Subject: drm/dp_mst: Add a helper to queue a topology probe

A follow up i915 patch will need to reprobe the MST topology after the
initial probing, add a helper for this.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-3-imre.deak@intel.com
---
 include/drm/display/drm_dp_mst_helper.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index cfe096389d94..02b037d3a93f 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -885,6 +885,8 @@ int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr);
 void drm_dp_mst_dump_topology(struct seq_file *m,
 			      struct drm_dp_mst_topology_mgr *mgr);
 
+void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr);
+
 void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr);
 int __must_check
 drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
-- 
cgit v1.2.3


From 9aed3b51fd6186582a95abd9fa67782982540749 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 2 Jul 2024 18:49:35 -0700
Subject: rcu: Better define "atomic" for list replacement

The kernel-doc headers for list_replace_rcu() and hlist_replace_rcu()
claim that the replacement is atomic, which it is, but only for readers.
Avoid confusion by making it clear that the atomic nature of these
functions applies only to readers, not to concurrent updaters.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rculist.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 3dc1e58865f7..14dfa6008467 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -191,7 +191,10 @@ static inline void hlist_del_init_rcu(struct hlist_node *n)
  * @old : the element to be replaced
  * @new : the new element to insert
  *
- * The @old entry will be replaced with the @new entry atomically.
+ * The @old entry will be replaced with the @new entry atomically from
+ * the perspective of concurrent readers.  It is the caller's responsibility
+ * to synchronize with concurrent updaters, if any.
+ *
  * Note: @old should not be empty.
  */
 static inline void list_replace_rcu(struct list_head *old,
@@ -519,7 +522,9 @@ static inline void hlist_del_rcu(struct hlist_node *n)
  * @old : the element to be replaced
  * @new : the new element to insert
  *
- * The @old entry will be replaced with the @new entry atomically.
+ * The @old entry will be replaced with the @new entry atomically from
+ * the perspective of concurrent readers.  It is the caller's responsibility
+ * to synchronize with concurrent updaters, if any.
  */
 static inline void hlist_replace_rcu(struct hlist_node *old,
 					struct hlist_node *new)
-- 
cgit v1.2.3


From 21b136cc63d2a9ddd60d4699552b69c214b32964 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 30 Jul 2024 15:44:16 -0700
Subject: minmax: fix up min3() and max3() too

David Laight pointed out that we should deal with the min3() and max3()
mess too, which still does excessive expansion.

And our current macros are actually rather broken.

In particular, the macros did this:

  #define min3(x, y, z) min((typeof(x))min(x, y), z)
  #define max3(x, y, z) max((typeof(x))max(x, y), z)

and that not only is a nested expansion of possibly very complex
arguments with all that involves, the typing with that "typeof()" cast
is completely wrong.

For example, imagine what happens in max3() if 'x' happens to be a
'unsigned char', but 'y' and 'z' are 'unsigned long'.  The types are
compatible, and there's no warning - but the result is just random
garbage.

No, I don't think we've ever hit that issue in practice, but since we
now have sane infrastructure for doing this right, let's just use it.
It fixes any excessive expansion, and also avoids these kinds of broken
type issues.

Requested-by: David Laight <David.Laight@aculab.com>
Acked-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/minmax.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 41da6f85a407..98008dd92153 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -152,13 +152,20 @@
 #define umax(x, y)	\
 	__careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull)
 
+#define __careful_op3(op, x, y, z, ux, uy, uz) ({			\
+	__auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\
+	BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz),			\
+		#op"3("#x", "#y", "#z") signedness error");		\
+	__cmp(op, ux, __cmp(op, uy, uz)); })
+
 /**
  * min3 - return minimum of three values
  * @x: first value
  * @y: second value
  * @z: third value
  */
-#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define min3(x, y, z) \
+	__careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_))
 
 /**
  * max3 - return maximum of three values
@@ -166,7 +173,8 @@
  * @y: second value
  * @z: third value
  */
-#define max3(x, y, z) max((typeof(x))max(x, y), z)
+#define max3(x, y, z) \
+	__careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_))
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
-- 
cgit v1.2.3


From ab03125268679e058e1e7b6612f6d12610761769 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 15 Jul 2024 11:00:34 -0400
Subject: cgroup: Show # of subsystem CSSes in cgroup.stat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cgroup subsystem state (CSS) is an abstraction in the cgroup layer to
help manage different structures in various cgroup subsystems by being
an embedded element inside a larger structure like cpuset or mem_cgroup.

The /proc/cgroups file shows the number of cgroups for each of the
subsystems.  With cgroup v1, the number of CSSes is the same as the
number of cgroups.  That is not the case anymore with cgroup v2. The
/proc/cgroups file cannot show the actual number of CSSes for the
subsystems that are bound to cgroup v2.

So if a v2 cgroup subsystem is leaking cgroups (usually memory cgroup),
we can't tell by looking at /proc/cgroups which cgroup subsystems may
be responsible.

As cgroup v2 had deprecated the use of /proc/cgroups, the hierarchical
cgroup.stat file is now being extended to show the number of live and
dying CSSes associated with all the non-inhibited cgroup subsystems that
have been bound to cgroup v2. The number includes CSSes in the current
cgroup as well as in all the descendants underneath it.  This will help
us pinpoint which subsystems are responsible for the increasing number
of dying (nr_dying_descendants) cgroups.

The CSSes dying counts are stored in the cgroup structure itself
instead of inside the CSS as suggested by Johannes. This will allow
us to accurately track dying counts of cgroup subsystems that have
recently been disabled in a cgroup. It is now possible that a zero
subsystem number is coupled with a non-zero dying subsystem number.

The cgroup-v2.rst file is updated to discuss this new behavior.

With this patch applied, a sample output from root cgroup.stat file
was shown below.

	nr_descendants 56
	nr_subsys_cpuset 1
	nr_subsys_cpu 43
	nr_subsys_io 43
	nr_subsys_memory 56
	nr_subsys_perf_event 57
	nr_subsys_hugetlb 1
	nr_subsys_pids 56
	nr_subsys_rdma 1
	nr_subsys_misc 1
	nr_dying_descendants 30
	nr_dying_subsys_cpuset 0
	nr_dying_subsys_cpu 0
	nr_dying_subsys_io 0
	nr_dying_subsys_memory 30
	nr_dying_subsys_perf_event 0
	nr_dying_subsys_hugetlb 0
	nr_dying_subsys_pids 0
	nr_dying_subsys_rdma 0
	nr_dying_subsys_misc 0

Another sample output from system.slice/cgroup.stat was:

	nr_descendants 34
	nr_subsys_cpuset 0
	nr_subsys_cpu 32
	nr_subsys_io 32
	nr_subsys_memory 34
	nr_subsys_perf_event 35
	nr_subsys_hugetlb 0
	nr_subsys_pids 34
	nr_subsys_rdma 0
	nr_subsys_misc 0
	nr_dying_descendants 30
	nr_dying_subsys_cpuset 0
	nr_dying_subsys_cpu 0
	nr_dying_subsys_io 0
	nr_dying_subsys_memory 30
	nr_dying_subsys_perf_event 0
	nr_dying_subsys_hugetlb 0
	nr_dying_subsys_pids 0
	nr_dying_subsys_rdma 0
	nr_dying_subsys_misc 0

Note that 'debug' controller wasn't used to provide this information because
the controller is not recommended in productions kernels, also many of them
won't enable CONFIG_CGROUP_DEBUG by default.

Similar information could be retrieved with debuggers like drgn but that's
also not always available (e.g. lockdown) and the additional cost of runtime
tracking here is deemed marginal.

tj: Added Michal's paragraphs on why this is not added the debug controller
    to the commit message.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Michal Koutný <mkoutny@suse.com>
Link: http://lkml.kernel.org/r/20240715150034.2583772-1-longman@redhat.com
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ae04035b6cbe..eb0f6f349496 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -210,6 +210,14 @@ struct cgroup_subsys_state {
 	 * fields of the containing structure.
 	 */
 	struct cgroup_subsys_state *parent;
+
+	/*
+	 * Keep track of total numbers of visible descendant CSSes.
+	 * The total number of dying CSSes is tracked in
+	 * css->cgroup->nr_dying_subsys[ssid].
+	 * Protected by cgroup_mutex.
+	 */
+	int nr_descendants;
 };
 
 /*
@@ -470,6 +478,12 @@ struct cgroup {
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
 
+	/*
+	 * Keep track of total number of dying CSSes at and below this cgroup.
+	 * Protected by cgroup_mutex.
+	 */
+	int nr_dying_subsys[CGROUP_SUBSYS_COUNT];
+
 	struct cgroup_root *root;
 
 	/*
-- 
cgit v1.2.3


From f81489a136ac2c18a7b9bb22cf37c0a6f6d58545 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 12 Jul 2024 13:57:13 -0700
Subject: hwmon: (max6697) Drop platform data support

Platform data is not used anywhere in the upstram kernel.
Drop support for it to simplify code maintenance.

Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/max6697.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)
 delete mode 100644 include/linux/platform_data/max6697.h

(limited to 'include')

diff --git a/include/linux/platform_data/max6697.h b/include/linux/platform_data/max6697.h
deleted file mode 100644
index 6fbb70005541..000000000000
--- a/include/linux/platform_data/max6697.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * max6697.h
- *     Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net>
- */
-
-#ifndef MAX6697_H
-#define MAX6697_H
-
-#include <linux/types.h>
-
-/*
- * For all bit masks:
- * bit 0:    local temperature
- * bit 1..7: remote temperatures
- */
-struct max6697_platform_data {
-	bool smbus_timeout_disable;	/* set to disable SMBus timeouts */
-	bool extended_range_enable;	/* set to enable extended temp range */
-	bool beta_compensation;		/* set to enable beta compensation */
-	u8 alert_mask;			/* set bit to 1 to disable alert */
-	u8 over_temperature_mask;	/* set bit to 1 to disable */
-	u8 resistance_cancellation;	/* set bit to 0 to disable
-					 * bit mask for MAX6581,
-					 * boolean for other chips
-					 */
-	u8 ideality_mask;		/* set bit to 0 to disable */
-	u8 ideality_value;		/* transistor ideality as per
-					 * MAX6581 datasheet
-					 */
-};
-
-#endif /* MAX6697_H */
-- 
cgit v1.2.3


From 298dec19bdeb6e33ac220502504d969272b50cf6 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 31 Jul 2024 00:14:36 -0500
Subject: scx: Allow calling sleepable kfuncs from BPF_PROG_TYPE_SYSCALL

We currently only allow calling sleepable scx kfuncs (i.e.
scx_bpf_create_dsq()) from BPF_PROG_TYPE_STRUCT_OPS progs. The idea here
was that we'd never have to call scx_bpf_create_dsq() outside of a
sched_ext struct_ops callback, but that might not actually be true. For
example, a scheduler could do something like the following:

1. Open and load (not yet attach) a scheduler skel

2. Synchronously call into a BPF_PROG_TYPE_SYSCALL prog from user space.
   For example, to initialize an LLC domain, or some other global,
   read-only state.

3. Attach the skel, which actually enables the scheduler

The advantage of doing this is that it can preclude having to do pretty
ugly boilerplate like initializing a read-only, statically sized array of
u64[]'s which the kernel consumes literally once at init time to then
create struct bpf_cpumask objects which are actually queried at runtime.

Doing the above is already possible given that we can invoke core BPF
kfuncs, such as bpf_cpumask_create(), from BPF_PROG_TYPE_SYSCALL progs. We
already allow many scx kfuncs to be called from BPF_PROG_TYPE_SYSCALL progs
(e.g. scx_bpf_kick_cpu()). Let's allow the sleepable kfuncs as well.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 593d2f4909dd..26e1c33bc844 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -106,16 +106,14 @@ enum scx_ent_dsq_flags {
  * mechanism. See scx_kf_allow().
  */
 enum scx_kf_mask {
-	SCX_KF_UNLOCKED		= 0,	  /* not sleepable, not rq locked */
-	/* all non-sleepables may be nested inside SLEEPABLE */
-	SCX_KF_SLEEPABLE	= 1 << 0, /* sleepable init operations */
+	SCX_KF_UNLOCKED		= 0,	  /* sleepable and not rq locked */
 	/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
-	SCX_KF_CPU_RELEASE	= 1 << 1, /* ops.cpu_release() */
+	SCX_KF_CPU_RELEASE	= 1 << 0, /* ops.cpu_release() */
 	/* ops.dequeue (in REST) may be nested inside DISPATCH */
-	SCX_KF_DISPATCH		= 1 << 2, /* ops.dispatch() */
-	SCX_KF_ENQUEUE		= 1 << 3, /* ops.enqueue() and ops.select_cpu() */
-	SCX_KF_SELECT_CPU	= 1 << 4, /* ops.select_cpu() */
-	SCX_KF_REST		= 1 << 5, /* other rq-locked operations */
+	SCX_KF_DISPATCH		= 1 << 1, /* ops.dispatch() */
+	SCX_KF_ENQUEUE		= 1 << 2, /* ops.enqueue() and ops.select_cpu() */
+	SCX_KF_SELECT_CPU	= 1 << 3, /* ops.select_cpu() */
+	SCX_KF_REST		= 1 << 4, /* other rq-locked operations */
 
 	__SCX_KF_RQ_LOCKED	= SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
 				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
-- 
cgit v1.2.3


From be72a57527fde6c80061c5f9d0e28762eb817b03 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Wed, 24 Jul 2024 10:06:58 +0800
Subject: lsm: Refactor return value of LSM hook vm_enough_memory

To be consistent with most LSM hooks, convert the return value of
hook vm_enough_memory to 0 or a negative error code.

Before:
- Hook vm_enough_memory returns 1 if permission is granted, 0 if not.
- LSM_RET_DEFAULT(vm_enough_memory_mm) is 1.

After:
- Hook vm_enough_memory reutrns 0 if permission is granted, negative
  error code if not.
- LSM_RET_DEFAULT(vm_enough_memory_mm) is 0.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 2 +-
 include/linux/security.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 12c81be78eb9..63e2656d1d56 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry)
 LSM_HOOK(int, 0, syslog, int type)
 LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
 	 const struct timezone *tz)
-LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
+LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
 LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
 LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
 LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
diff --git a/include/linux/security.h b/include/linux/security.h
index 1390f1efb4f0..62233fec8ead 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -634,7 +634,7 @@ static inline int security_settime64(const struct timespec64 *ts,
 
 static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 {
-	return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
+	return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages));
 }
 
 static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm)
-- 
cgit v1.2.3


From 39b5ffc9554366122dbe5a4088a7c8798068ee9f Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Tue, 30 Jul 2024 20:24:38 -0700
Subject: dt-bindings: clock: qcom: Add missing USB MP resets

The USB multiport controller needs a few missing resets, describe them
in the binding.

Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Link: https://lore.kernel.org/r/20240730-sc8180x-usb-mp-v2-1-a7dc4265b553@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,gcc-sc8180x.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
index 90c6e021a035..487b12c19db5 100644
--- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h
+++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
@@ -294,6 +294,10 @@
 #define GCC_VIDEO_AXI0_CLK_BCR					42
 #define GCC_VIDEO_AXI1_CLK_BCR					43
 #define GCC_USB3_DP_PHY_SEC_BCR					44
+#define GCC_USB3_UNIPHY_MP0_BCR					45
+#define GCC_USB3_UNIPHY_MP1_BCR					46
+#define GCC_USB3UNIPHY_PHY_MP0_BCR				47
+#define GCC_USB3UNIPHY_PHY_MP1_BCR				48
 
 /* GCC GDSCRs */
 #define EMAC_GDSC						0
-- 
cgit v1.2.3


From 44933cd06e34e9838fa0d6c8ed16a3632d49849b Mon Sep 17 00:00:00 2001
From: Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
Date: Wed, 31 Jul 2024 11:59:14 +0530
Subject: dt-bindings: clock: qcom: Add SM8150 camera clock controller

Add device tree bindings for the camera clock controller on
Qualcomm SM8150 platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
Link: https://lore.kernel.org/r/20240731062916.2680823-7-quic_skakitap@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm8150-camcc.h | 135 ++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm8150-camcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm8150-camcc.h b/include/dt-bindings/clock/qcom,sm8150-camcc.h
new file mode 100644
index 000000000000..5444035efa93
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm8150-camcc.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8150_H
+#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8150_H
+
+/* CAM_CC clocks */
+#define CAM_CC_PLL0					0
+#define CAM_CC_PLL0_OUT_EVEN				1
+#define CAM_CC_PLL0_OUT_ODD				2
+#define CAM_CC_PLL1					3
+#define CAM_CC_PLL1_OUT_EVEN				4
+#define CAM_CC_PLL2					5
+#define CAM_CC_PLL2_OUT_MAIN				6
+#define CAM_CC_PLL3					7
+#define CAM_CC_PLL3_OUT_EVEN				8
+#define CAM_CC_PLL4					9
+#define CAM_CC_PLL4_OUT_EVEN				10
+#define CAM_CC_BPS_AHB_CLK				11
+#define CAM_CC_BPS_AREG_CLK				12
+#define CAM_CC_BPS_AXI_CLK				13
+#define CAM_CC_BPS_CLK					14
+#define CAM_CC_BPS_CLK_SRC				15
+#define CAM_CC_CAMNOC_AXI_CLK				16
+#define CAM_CC_CAMNOC_AXI_CLK_SRC			17
+#define CAM_CC_CAMNOC_DCD_XO_CLK			18
+#define CAM_CC_CCI_0_CLK				19
+#define CAM_CC_CCI_0_CLK_SRC				20
+#define CAM_CC_CCI_1_CLK				21
+#define CAM_CC_CCI_1_CLK_SRC				22
+#define CAM_CC_CORE_AHB_CLK				23
+#define CAM_CC_CPAS_AHB_CLK				24
+#define CAM_CC_CPHY_RX_CLK_SRC				25
+#define CAM_CC_CSI0PHYTIMER_CLK				26
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC			27
+#define CAM_CC_CSI1PHYTIMER_CLK				28
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC			29
+#define CAM_CC_CSI2PHYTIMER_CLK				30
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC			31
+#define CAM_CC_CSI3PHYTIMER_CLK				32
+#define CAM_CC_CSI3PHYTIMER_CLK_SRC			33
+#define CAM_CC_CSIPHY0_CLK				34
+#define CAM_CC_CSIPHY1_CLK				35
+#define CAM_CC_CSIPHY2_CLK				36
+#define CAM_CC_CSIPHY3_CLK				37
+#define CAM_CC_FAST_AHB_CLK_SRC				38
+#define CAM_CC_FD_CORE_CLK				39
+#define CAM_CC_FD_CORE_CLK_SRC				40
+#define CAM_CC_FD_CORE_UAR_CLK				41
+#define CAM_CC_GDSC_CLK					42
+#define CAM_CC_ICP_AHB_CLK				43
+#define CAM_CC_ICP_CLK					44
+#define CAM_CC_ICP_CLK_SRC				45
+#define CAM_CC_IFE_0_AXI_CLK				46
+#define CAM_CC_IFE_0_CLK				47
+#define CAM_CC_IFE_0_CLK_SRC				48
+#define CAM_CC_IFE_0_CPHY_RX_CLK			49
+#define CAM_CC_IFE_0_CSID_CLK				50
+#define CAM_CC_IFE_0_CSID_CLK_SRC			51
+#define CAM_CC_IFE_0_DSP_CLK				52
+#define CAM_CC_IFE_1_AXI_CLK				53
+#define CAM_CC_IFE_1_CLK				54
+#define CAM_CC_IFE_1_CLK_SRC				55
+#define CAM_CC_IFE_1_CPHY_RX_CLK			56
+#define CAM_CC_IFE_1_CSID_CLK				57
+#define CAM_CC_IFE_1_CSID_CLK_SRC			58
+#define CAM_CC_IFE_1_DSP_CLK				59
+#define CAM_CC_IFE_LITE_0_CLK				60
+#define CAM_CC_IFE_LITE_0_CLK_SRC			61
+#define CAM_CC_IFE_LITE_0_CPHY_RX_CLK			62
+#define CAM_CC_IFE_LITE_0_CSID_CLK			63
+#define CAM_CC_IFE_LITE_0_CSID_CLK_SRC			64
+#define CAM_CC_IFE_LITE_1_CLK				65
+#define CAM_CC_IFE_LITE_1_CLK_SRC			66
+#define CAM_CC_IFE_LITE_1_CPHY_RX_CLK			67
+#define CAM_CC_IFE_LITE_1_CSID_CLK			68
+#define CAM_CC_IFE_LITE_1_CSID_CLK_SRC			69
+#define CAM_CC_IPE_0_AHB_CLK				70
+#define CAM_CC_IPE_0_AREG_CLK				71
+#define CAM_CC_IPE_0_AXI_CLK				72
+#define CAM_CC_IPE_0_CLK				73
+#define CAM_CC_IPE_0_CLK_SRC				74
+#define CAM_CC_IPE_1_AHB_CLK				75
+#define CAM_CC_IPE_1_AREG_CLK				76
+#define CAM_CC_IPE_1_AXI_CLK				77
+#define CAM_CC_IPE_1_CLK				78
+#define CAM_CC_JPEG_CLK					79
+#define CAM_CC_JPEG_CLK_SRC				80
+#define CAM_CC_LRME_CLK					81
+#define CAM_CC_LRME_CLK_SRC				82
+#define CAM_CC_MCLK0_CLK				83
+#define CAM_CC_MCLK0_CLK_SRC				84
+#define CAM_CC_MCLK1_CLK				85
+#define CAM_CC_MCLK1_CLK_SRC				86
+#define CAM_CC_MCLK2_CLK				87
+#define CAM_CC_MCLK2_CLK_SRC				88
+#define CAM_CC_MCLK3_CLK				89
+#define CAM_CC_MCLK3_CLK_SRC				90
+#define CAM_CC_SLOW_AHB_CLK_SRC				91
+
+/* CAM_CC power domains */
+#define TITAN_TOP_GDSC					0
+#define BPS_GDSC					1
+#define IFE_0_GDSC					2
+#define IFE_1_GDSC					3
+#define IPE_0_GDSC					4
+#define IPE_1_GDSC					5
+
+/* CAM_CC resets */
+#define CAM_CC_BPS_BCR					0
+#define CAM_CC_CAMNOC_BCR				1
+#define CAM_CC_CCI_BCR					2
+#define CAM_CC_CPAS_BCR					3
+#define CAM_CC_CSI0PHY_BCR				4
+#define CAM_CC_CSI1PHY_BCR				5
+#define CAM_CC_CSI2PHY_BCR				6
+#define CAM_CC_CSI3PHY_BCR				7
+#define CAM_CC_FD_BCR					8
+#define CAM_CC_ICP_BCR					9
+#define CAM_CC_IFE_0_BCR				10
+#define CAM_CC_IFE_1_BCR				11
+#define CAM_CC_IFE_LITE_0_BCR				12
+#define CAM_CC_IFE_LITE_1_BCR				13
+#define CAM_CC_IPE_0_BCR				14
+#define CAM_CC_IPE_1_BCR				15
+#define CAM_CC_JPEG_BCR					16
+#define CAM_CC_LRME_BCR					17
+#define CAM_CC_MCLK0_BCR				18
+#define CAM_CC_MCLK1_BCR				19
+#define CAM_CC_MCLK2_BCR				20
+#define CAM_CC_MCLK3_BCR				21
+
+#endif
-- 
cgit v1.2.3


From 99447ef003d199329c0a1890f54958fcd10f7063 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Wed, 17 Jul 2024 13:04:34 +0300
Subject: dt-bindings: clock: qcom,sm8650-dispcc: replace with symlink

The display clock controller indices for SM8650 and SM8550 are
completely equal. Replace the header file for qcom,sm8650-dispcc with
the symlink to the qcom,sm8550-dispcc header file.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-7-5c4a3128c40b@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm8650-dispcc.h | 103 +------------------------
 1 file changed, 1 insertion(+), 102 deletions(-)
 mode change 100644 => 120000 include/dt-bindings/clock/qcom,sm8650-dispcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm8650-dispcc.h b/include/dt-bindings/clock/qcom,sm8650-dispcc.h
deleted file mode 100644
index b0a668b395a5..000000000000
--- a/include/dt-bindings/clock/qcom,sm8650-dispcc.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
-/*
- * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved
- * Copyright (c) 2023, Linaro Ltd.
- */
-
-#ifndef _DT_BINDINGS_CLK_QCOM_SM8650_DISP_CC_H
-#define _DT_BINDINGS_CLK_QCOM_SM8650_DISP_CC_H
-
-/* DISP_CC clocks */
-#define DISP_CC_MDSS_ACCU_CLK					0
-#define DISP_CC_MDSS_AHB1_CLK					1
-#define DISP_CC_MDSS_AHB_CLK					2
-#define DISP_CC_MDSS_AHB_CLK_SRC				3
-#define DISP_CC_MDSS_BYTE0_CLK					4
-#define DISP_CC_MDSS_BYTE0_CLK_SRC				5
-#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC				6
-#define DISP_CC_MDSS_BYTE0_INTF_CLK				7
-#define DISP_CC_MDSS_BYTE1_CLK					8
-#define DISP_CC_MDSS_BYTE1_CLK_SRC				9
-#define DISP_CC_MDSS_BYTE1_DIV_CLK_SRC				10
-#define DISP_CC_MDSS_BYTE1_INTF_CLK				11
-#define DISP_CC_MDSS_DPTX0_AUX_CLK				12
-#define DISP_CC_MDSS_DPTX0_AUX_CLK_SRC				13
-#define DISP_CC_MDSS_DPTX0_CRYPTO_CLK				14
-#define DISP_CC_MDSS_DPTX0_LINK_CLK				15
-#define DISP_CC_MDSS_DPTX0_LINK_CLK_SRC				16
-#define DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC			17
-#define DISP_CC_MDSS_DPTX0_LINK_INTF_CLK			18
-#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK				19
-#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC			20
-#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK				21
-#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC			22
-#define DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK		23
-#define DISP_CC_MDSS_DPTX1_AUX_CLK				24
-#define DISP_CC_MDSS_DPTX1_AUX_CLK_SRC				25
-#define DISP_CC_MDSS_DPTX1_CRYPTO_CLK				26
-#define DISP_CC_MDSS_DPTX1_LINK_CLK				27
-#define DISP_CC_MDSS_DPTX1_LINK_CLK_SRC				28
-#define DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC			29
-#define DISP_CC_MDSS_DPTX1_LINK_INTF_CLK			30
-#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK				31
-#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC			32
-#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK				33
-#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC			34
-#define DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK		35
-#define DISP_CC_MDSS_DPTX2_AUX_CLK				36
-#define DISP_CC_MDSS_DPTX2_AUX_CLK_SRC				37
-#define DISP_CC_MDSS_DPTX2_CRYPTO_CLK				38
-#define DISP_CC_MDSS_DPTX2_LINK_CLK				39
-#define DISP_CC_MDSS_DPTX2_LINK_CLK_SRC				40
-#define DISP_CC_MDSS_DPTX2_LINK_DIV_CLK_SRC			41
-#define DISP_CC_MDSS_DPTX2_LINK_INTF_CLK			42
-#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK				43
-#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC			44
-#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK				45
-#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK_SRC			46
-#define DISP_CC_MDSS_DPTX3_AUX_CLK				47
-#define DISP_CC_MDSS_DPTX3_AUX_CLK_SRC				48
-#define DISP_CC_MDSS_DPTX3_CRYPTO_CLK				49
-#define DISP_CC_MDSS_DPTX3_LINK_CLK				50
-#define DISP_CC_MDSS_DPTX3_LINK_CLK_SRC				51
-#define DISP_CC_MDSS_DPTX3_LINK_DIV_CLK_SRC			52
-#define DISP_CC_MDSS_DPTX3_LINK_INTF_CLK			53
-#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK				54
-#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC			55
-#define DISP_CC_MDSS_ESC0_CLK					56
-#define DISP_CC_MDSS_ESC0_CLK_SRC				57
-#define DISP_CC_MDSS_ESC1_CLK					58
-#define DISP_CC_MDSS_ESC1_CLK_SRC				59
-#define DISP_CC_MDSS_MDP1_CLK					60
-#define DISP_CC_MDSS_MDP_CLK					61
-#define DISP_CC_MDSS_MDP_CLK_SRC				62
-#define DISP_CC_MDSS_MDP_LUT1_CLK				63
-#define DISP_CC_MDSS_MDP_LUT_CLK				64
-#define DISP_CC_MDSS_NON_GDSC_AHB_CLK				65
-#define DISP_CC_MDSS_PCLK0_CLK					66
-#define DISP_CC_MDSS_PCLK0_CLK_SRC				67
-#define DISP_CC_MDSS_PCLK1_CLK					68
-#define DISP_CC_MDSS_PCLK1_CLK_SRC				69
-#define DISP_CC_MDSS_RSCC_AHB_CLK				70
-#define DISP_CC_MDSS_RSCC_VSYNC_CLK				71
-#define DISP_CC_MDSS_VSYNC1_CLK					72
-#define DISP_CC_MDSS_VSYNC_CLK					73
-#define DISP_CC_MDSS_VSYNC_CLK_SRC				74
-#define DISP_CC_PLL0						75
-#define DISP_CC_PLL1						76
-#define DISP_CC_SLEEP_CLK					77
-#define DISP_CC_SLEEP_CLK_SRC					78
-#define DISP_CC_XO_CLK						79
-#define DISP_CC_XO_CLK_SRC					80
-
-/* DISP_CC resets */
-#define DISP_CC_MDSS_CORE_BCR					0
-#define DISP_CC_MDSS_CORE_INT2_BCR				1
-#define DISP_CC_MDSS_RSCC_BCR					2
-
-/* DISP_CC GDSCR */
-#define MDSS_GDSC						0
-#define MDSS_INT2_GDSC						1
-
-#endif
diff --git a/include/dt-bindings/clock/qcom,sm8650-dispcc.h b/include/dt-bindings/clock/qcom,sm8650-dispcc.h
new file mode 120000
index 000000000000..c0a291188f28
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm8650-dispcc.h
@@ -0,0 +1 @@
+qcom,sm8550-dispcc.h
\ No newline at end of file
-- 
cgit v1.2.3


From c9c0ee5f20c593faf289fa8850c3ed84031dd12a Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 29 Jul 2024 03:47:40 -0700
Subject: net: skbuff: Skip early return in skb_unref when debugging

This patch modifies the skb_unref function to skip the early return
optimization when CONFIG_DEBUG_NET is enabled. The change ensures that
the reference count decrement always occurs in debug builds, allowing
for more thorough checking of SKB reference counting.

Previously, when the SKB's reference count was 1 and CONFIG_DEBUG_NET
was not set, the function would return early after a memory barrier
(smp_rmb()) without decrementing the reference count. This optimization
assumes it's safe to proceed with freeing the SKB without the overhead
of an atomic decrement from 1 to 0.

With this change:
- In non-debug builds (CONFIG_DEBUG_NET not set), behavior remains
  unchanged, preserving the performance optimization.
- In debug builds (CONFIG_DEBUG_NET set), the reference count is always
  decremented, even when it's 1, allowing for consistent behavior and
  potentially catching subtle SKB management bugs.

This modification enhances debugging capabilities for networking code
without impacting performance in production kernels. It helps kernel
developers identify and diagnose issues related to SKB management and
reference counting in the network stack.

Cc: Chris Mason <clm@fb.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240729104741.370327-1-leitao@debian.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 29c3ea5b6e93..cf8f6ce06742 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1225,7 +1225,7 @@ static inline bool skb_unref(struct sk_buff *skb)
 {
 	if (unlikely(!skb))
 		return false;
-	if (likely(refcount_read(&skb->users) == 1))
+	if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1))
 		smp_rmb();
 	else if (likely(!refcount_dec_and_test(&skb->users)))
 		return false;
-- 
cgit v1.2.3


From e469e2045f1b78418f39a2461a9fdf73c2789a1a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 1 Aug 2024 08:48:06 +0200
Subject: ALSA: memalloc: Let IOMMU handle S/G primarily

The recent changes in IOMMU made the non-contiguous page allocations
as default, hence we can simply use the standard DMA allocation for
the S/G pages as well.  In this patch, we simplify the code by trying
the standard DMA allocation at first, instead of
dma_alloc_noncontiguous().

For the case without IOMMU, we still need to manage the S/G pages
manually, so we keep the same fallback routines like before.

The fallback types (SNDRV_DMA_TYPE_DEV_SG_FALLBACK & co) are dropped /
folded into SNDRV_DMA_TYPE_DEV_SG and co now.  The allocation via the
standard DMA call overrides the type accordingly, hence we don't have
to have extra fallback types any longer.  OTOH, SNDRV_DMA_TYPE_DEV_SG
is no longer an alias but became its own type back again.

Note that this patch requires another prerequisite fix for memmalloc
helper to use the DMA API for WC pages on x86.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=219087
Link: https://patch.msgid.link/20240801064808.31205-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/memalloc.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 43d524580bd2..9dd475cf4e8c 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -42,17 +42,12 @@ struct snd_dma_device {
 #define SNDRV_DMA_TYPE_NONCONTIG	8	/* non-coherent SG buffer */
 #define SNDRV_DMA_TYPE_NONCOHERENT	9	/* non-coherent buffer */
 #ifdef CONFIG_SND_DMA_SGBUF
-#define SNDRV_DMA_TYPE_DEV_SG		SNDRV_DMA_TYPE_NONCONTIG
+#define SNDRV_DMA_TYPE_DEV_SG		3	/* S/G pages */
 #define SNDRV_DMA_TYPE_DEV_WC_SG	6	/* SG write-combined */
 #else
 #define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
 #define SNDRV_DMA_TYPE_DEV_WC_SG	SNDRV_DMA_TYPE_DEV_WC
 #endif
-/* fallback types, don't use those directly */
-#ifdef CONFIG_SND_DMA_SGBUF
-#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK		10
-#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK	11
-#endif
 
 /*
  * info for buffer allocation
-- 
cgit v1.2.3


From 469b77e421b92ce4662f6f1a47f1e7af9dbd14bd Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 30 Jul 2024 02:05:12 +0000
Subject: ALSA: trace: use snd_pcm_direction_name()

We already have snd_pcm_direction_name(). Let's use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87sevrk52f.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/trace/events/asoc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h
index 202fc3680c36..6696dbcc2b96 100644
--- a/include/trace/events/asoc.h
+++ b/include/trace/events/asoc.h
@@ -8,6 +8,7 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 #include <sound/jack.h>
+#include <sound/pcm.h>
 
 #define DAPM_DIRECT "(direct)"
 #define DAPM_ARROW(dir) (((dir) == SND_SOC_DAPM_DIR_OUT) ? "->" : "<-")
@@ -212,7 +213,7 @@ TRACE_EVENT(snd_soc_dapm_connected,
 	),
 
 	TP_printk("%s: found %d paths",
-		__entry->stream ? "capture" : "playback", __entry->paths)
+		  snd_pcm_direction_name(__entry->stream), __entry->paths)
 );
 
 TRACE_EVENT(snd_soc_jack_irq,
-- 
cgit v1.2.3


From d39388e6555cb805beb985a7ece1c771c611037c Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:22 +0530
Subject: ASoC: intel/sdw-utils: move soundwire machine driver soc ops

Move Intel SoundWire generic machine driver soc ops to common place
so that it can be used by other platform machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-8-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/sound/soc_sdw_utils.h

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
new file mode 100644
index 000000000000..cf4cdb66b2de
--- /dev/null
+++ b/include/sound/soc_sdw_utils.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * This file incorporates work covered by the following copyright notice:
+ * Copyright (c) 2020 Intel Corporation
+ * Copyright(c) 2024 Advanced Micro Devices, Inc.
+ *
+ */
+
+#ifndef SOC_SDW_UTILS_H
+#define SOC_SDW_UTILS_H
+
+#include <sound/soc.h>
+
+int asoc_sdw_startup(struct snd_pcm_substream *substream);
+int asoc_sdw_prepare(struct snd_pcm_substream *substream);
+int asoc_sdw_prepare(struct snd_pcm_substream *substream);
+int asoc_sdw_trigger(struct snd_pcm_substream *substream, int cmd);
+int asoc_sdw_hw_params(struct snd_pcm_substream *substream,
+		       struct snd_pcm_hw_params *params);
+int asoc_sdw_hw_free(struct snd_pcm_substream *substream);
+void asoc_sdw_shutdown(struct snd_pcm_substream *substream);
+
+#endif
-- 
cgit v1.2.3


From 73619137c633a89a90f8c8c5fa59171aaff6e913 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:23 +0530
Subject: ASoC: intel: move soundwire machine driver common structures

Move intel generic SoundWire machine driver common structures to
soc_sdw_utils.h file. These structures will be used in other platform
SoundWire machine driver code.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-9-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index cf4cdb66b2de..1ae5523bbcf8 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -11,6 +11,49 @@
 
 #include <sound/soc.h>
 
+#define SOC_SDW_MAX_DAI_NUM             8
+
+struct asoc_sdw_codec_info;
+
+struct asoc_sdw_dai_info {
+	const bool direction[2]; /* playback & capture support */
+	const char *dai_name;
+	const int dai_type;
+	const int dailink[2]; /* dailink id for each direction */
+	const struct snd_kcontrol_new *controls;
+	const int num_controls;
+	const struct snd_soc_dapm_widget *widgets;
+	const int num_widgets;
+	int  (*init)(struct snd_soc_card *card,
+		     struct snd_soc_dai_link *dai_links,
+		     struct asoc_sdw_codec_info *info,
+		     bool playback);
+	int (*exit)(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
+	int (*rtd_init)(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+	bool rtd_init_done; /* Indicate that the rtd_init callback is done */
+	unsigned long quirk;
+};
+
+struct asoc_sdw_codec_info {
+	const int part_id;
+	const int version_id;
+	const char *codec_name;
+	int amp_num;
+	const u8 acpi_id[ACPI_ID_LEN];
+	const bool ignore_internal_dmic;
+	const struct snd_soc_ops *ops;
+	struct asoc_sdw_dai_info dais[SOC_SDW_MAX_DAI_NUM];
+	const int dai_num;
+
+	int (*codec_card_late_probe)(struct snd_soc_card *card);
+
+	int  (*count_sidecar)(struct snd_soc_card *card,
+			      int *num_dais, int *num_devs);
+	int  (*add_sidecar)(struct snd_soc_card *card,
+			    struct snd_soc_dai_link **dai_links,
+			    struct snd_soc_codec_conf **codec_conf);
+};
+
 int asoc_sdw_startup(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
-- 
cgit v1.2.3


From 941d6933eb31b13d09a7293bc92d9d494513a372 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:24 +0530
Subject: ASoC: intel/sdw_utils: move soundwire machine driver helper functions

Move below Intel SoundWire machine driver helper functions to
soc_sdw_utils.c file so that it can be used by other platform machine
driver.
- asoc_sdw_is_unique_device()
- asoc_sdw_get_codec_name()

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-10-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 1ae5523bbcf8..7ca3a6afdfb1 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -10,6 +10,7 @@
 #define SOC_SDW_UTILS_H
 
 #include <sound/soc.h>
+#include <sound/soc-acpi.h>
 
 #define SOC_SDW_MAX_DAI_NUM             8
 
@@ -63,4 +64,8 @@ int asoc_sdw_hw_params(struct snd_pcm_substream *substream,
 int asoc_sdw_hw_free(struct snd_pcm_substream *substream);
 void asoc_sdw_shutdown(struct snd_pcm_substream *substream);
 
+const char *asoc_sdw_get_codec_name(struct device *dev,
+				    const struct asoc_sdw_codec_info *codec_info,
+				    const struct snd_soc_acpi_link_adr *adr_link,
+				    int adr_index);
 #endif
-- 
cgit v1.2.3


From 4776d0c9088634677749e42ae8b36b4da46226db Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:25 +0530
Subject: ASoC: intel/sdw_utils: move dmic codec helper function

Move generic dmic codec helper function implementation to
sdw_utils folder so that this function can be used by other platform
machine drivers.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-11-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 7ca3a6afdfb1..0ffbd9847532 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -68,4 +68,8 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
 				    const struct asoc_sdw_codec_info *codec_info,
 				    const struct snd_soc_acpi_link_adr *adr_link,
 				    int adr_index);
+
+/* DMIC support */
+int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
+
 #endif
-- 
cgit v1.2.3


From a9831fd1c0e6353366bbde6e1dd7b15a2dd6d825 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:26 +0530
Subject: ASoC: intel/sdw_utils: move rtk dmic helper functions

Move rtk SoundWire dmic helper functions implementation to sdw_utils
folder to make it generic.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-12-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 0ffbd9847532..9fa102fc03c3 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -72,4 +72,7 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
 /* DMIC support */
 int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 
+/* dai_link init callbacks */
+int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+
 #endif
-- 
cgit v1.2.3


From 09c60bc9da91f188e2aedb0bac94d3e129fa20f9 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:27 +0530
Subject: ASoC: intel/sdw_utils: move rt712 sdca helper functions

Move RT712 SDCA codec helper file to sdw_utils folder so that these
helper functions can be used by other platform machine drivers.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-13-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 9fa102fc03c3..6fd305253e2a 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -74,5 +74,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 
 #endif
-- 
cgit v1.2.3


From 89b3456e9afa4c61879f6d744f6d2c6fadc2b2fc Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:28 +0530
Subject: ASoC: intel/sdw_utils: move rt722 sdca helper functions

Move RT722 SDCA codec helper file to sdw_utils folder to make it generic.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-14-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 6fd305253e2a..5bc2a89bced4 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -75,5 +75,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 
 #endif
-- 
cgit v1.2.3


From 4f54856b4ea460e9048c11e3f698c38fb072529d Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:29 +0530
Subject: ASoC: intel: split soundwire machine driver private data

Split intel generic SoundWire machine driver private data into two
structures. One structure is generic one which can be used by other
platform machine driver and the other one is intel specific one.
Move generic machine driver private data to soc_sdw_utils.h.
Define a void pointer in generic machine driver private data structure
and assign the vendor specific structure in mc_probe() call.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-15-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 5bc2a89bced4..eb713cdf4079 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -55,6 +55,16 @@ struct asoc_sdw_codec_info {
 			    struct snd_soc_codec_conf **codec_conf);
 };
 
+struct asoc_sdw_mc_private {
+	struct snd_soc_card card;
+	struct snd_soc_jack sdw_headset;
+	struct device *headset_codec_dev; /* only one headset per card */
+	struct device *amp_dev1, *amp_dev2;
+	bool append_dai_type;
+	bool ignore_internal_dmic;
+	void *private;
+};
+
 int asoc_sdw_startup(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
-- 
cgit v1.2.3


From 139e17740200d8e92677942bfee6c8cfe4da3009 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:30 +0530
Subject: ASoC: intel/sdw_utils: move rt5682 codec helper function

Move rt5682 sdw codec helper function to common place holder to make it
generic.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-16-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index eb713cdf4079..ed97d78336da 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -86,5 +86,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 
 #endif
-- 
cgit v1.2.3


From da5b1831673208e235cadc9107207adb092c2eb9 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:31 +0530
Subject: ASoC: intel/sdw_utils: move rtk jack common helper functions

Move RTK codec jack common helper functions to common place holder
(sdw_utils folder) to make it generic so that it will be used by
other platform machine driver code.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-17-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index ed97d78336da..6b6bab8d3310 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -13,6 +13,8 @@
 #include <sound/soc-acpi.h>
 
 #define SOC_SDW_MAX_DAI_NUM             8
+#define SOC_SDW_MAX_NO_PROPS		2
+#define SOC_SDW_JACK_JDSRC(quirk)	((quirk) & GENMASK(3, 0))
 
 struct asoc_sdw_codec_info;
 
@@ -63,6 +65,7 @@ struct asoc_sdw_mc_private {
 	bool append_dai_type;
 	bool ignore_internal_dmic;
 	void *private;
+	unsigned long mc_quirk;
 };
 
 int asoc_sdw_startup(struct snd_pcm_substream *substream);
@@ -82,8 +85,16 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
 /* DMIC support */
 int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 
+/* RT711-SDCA support */
+int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card,
+			       struct snd_soc_dai_link *dai_links,
+			       struct asoc_sdw_codec_info *info,
+			       bool playback);
+int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
+
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
-- 
cgit v1.2.3


From 8e84fd22dc425fd0b005a20156eeb67f019ae39f Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:32 +0530
Subject: ASoC: intel/sdw_utils: move rt700 and rt711 codec helper functions

Move RT700 and RT711 Soundwire codec helper functions to common
place holder so that it can be used by other platform machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-18-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 6b6bab8d3310..3e55cac33176 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -85,6 +85,13 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
 /* DMIC support */
 int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 
+/* RT711 support */
+int asoc_sdw_rt711_init(struct snd_soc_card *card,
+			struct snd_soc_dai_link *dai_links,
+			struct asoc_sdw_codec_info *info,
+			bool playback);
+int asoc_sdw_rt711_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
+
 /* RT711-SDCA support */
 int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card,
 			       struct snd_soc_dai_link *dai_links,
@@ -95,6 +102,8 @@ int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_lin
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt700_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
-- 
cgit v1.2.3


From ccc96ae2814a7faad591af68bd0115e4d2b256b4 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:33 +0530
Subject: ASoC: intel/sdw_utils: move rtk amp codec helper functions

Move RTK amp codec helper functions related implementation to common
place holder to make it generic so that these helper functions will be
used by other platform machine driver modules.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-19-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 3e55cac33176..450542eb3ea0 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -99,9 +99,20 @@ int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card,
 			       bool playback);
 int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
 
+/* RT1308 I2S support */
+extern const struct snd_soc_ops soc_sdw_rt1308_i2s_ops;
+
+/* generic amp support */
+int asoc_sdw_rt_amp_init(struct snd_soc_card *card,
+			 struct snd_soc_dai_link *dai_links,
+			 struct asoc_sdw_codec_info *info,
+			 bool playback);
+int asoc_sdw_rt_amp_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
+
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_rt_amp_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt700_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
-- 
cgit v1.2.3


From 5fa46627d5118bf58b80df45489f49e1e316473a Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:34 +0530
Subject: ASoC: intel/sdw_utils: move cirrus soundwire codec helper functions

To make it generic, move Cirrus Soundwire codec helper functions to
common place holder so that it can be used by other platform machine
driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-20-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 450542eb3ea0..d5dd887b9d15 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -16,6 +16,19 @@
 #define SOC_SDW_MAX_NO_PROPS		2
 #define SOC_SDW_JACK_JDSRC(quirk)	((quirk) & GENMASK(3, 0))
 
+/* If a CODEC has an optional speaker output, this quirk will enable it */
+#define SOC_SDW_CODEC_SPKR			BIT(15)
+/*
+ * If the CODEC has additional devices attached directly to it.
+ *
+ * For the cs42l43:
+ *   - 0 - No speaker output
+ *   - SOC_SDW_CODEC_SPKR - CODEC internal speaker
+ *   - SOC_SDW_SIDECAR_AMPS - 2x Sidecar amplifiers + CODEC internal speaker
+ *   - SOC_SDW_CODEC_SPKR | SOF_SIDECAR_AMPS - Not currently supported
+ */
+#define SOC_SDW_SIDECAR_AMPS		BIT(16)
+
 struct asoc_sdw_codec_info;
 
 struct asoc_sdw_dai_info {
@@ -109,6 +122,28 @@ int asoc_sdw_rt_amp_init(struct snd_soc_card *card,
 			 bool playback);
 int asoc_sdw_rt_amp_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link);
 
+/* CS42L43 support */
+int asoc_sdw_cs42l43_spk_init(struct snd_soc_card *card,
+			      struct snd_soc_dai_link *dai_links,
+			      struct asoc_sdw_codec_info *info,
+			      bool playback);
+
+/* CS AMP support */
+int asoc_sdw_bridge_cs35l56_count_sidecar(struct snd_soc_card *card,
+					  int *num_dais, int *num_devs);
+int asoc_sdw_bridge_cs35l56_add_sidecar(struct snd_soc_card *card,
+					struct snd_soc_dai_link **dai_links,
+					struct snd_soc_codec_conf **codec_conf);
+int asoc_sdw_bridge_cs35l56_spk_init(struct snd_soc_card *card,
+				     struct snd_soc_dai_link *dai_links,
+				     struct asoc_sdw_codec_info *info,
+				     bool playback);
+
+int asoc_sdw_cs_amp_init(struct snd_soc_card *card,
+			 struct snd_soc_dai_link *dai_links,
+			 struct asoc_sdw_codec_info *info,
+			 bool playback);
+
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
@@ -118,5 +153,10 @@ int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai
 int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_cs42l43_hs_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_cs42l43_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 
 #endif
-- 
cgit v1.2.3


From 051b7cb3fde16fd8788078d697d84069b0e50e03 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 14:44:35 +0530
Subject: ASoC: intel/sdw_utils: move maxim codec helper functions

Move maxim codec helper functions to common place holder so that
it can be used by other platform machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801091446.10457-21-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index d5dd887b9d15..9e84d1ab6cd0 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -144,6 +144,12 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card,
 			 struct asoc_sdw_codec_info *info,
 			 bool playback);
 
+/* MAXIM codec support */
+int asoc_sdw_maxim_init(struct snd_soc_card *card,
+			struct snd_soc_dai_link *dai_links,
+			struct asoc_sdw_codec_info *info,
+			bool playback);
+
 /* dai_link init callbacks */
 int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
@@ -158,5 +164,6 @@ int asoc_sdw_cs42l43_hs_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc
 int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_cs42l43_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
+int asoc_sdw_maxim_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai);
 
 #endif
-- 
cgit v1.2.3


From 8f87e292a34813e4551e1513c62b7222900481cb Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:07 +0530
Subject: ASoC: intel/sdw_utils: move dai id common macros

Move dai id common macros from intel SoundWire generic driver to
soc_sdw_utils.h file so that it can be used by other platform machine
driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-1-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 9e84d1ab6cd0..13941ddd24c8 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -29,6 +29,13 @@
  */
 #define SOC_SDW_SIDECAR_AMPS		BIT(16)
 
+#define SOC_SDW_UNUSED_DAI_ID		-1
+#define SOC_SDW_JACK_OUT_DAI_ID		0
+#define SOC_SDW_JACK_IN_DAI_ID		1
+#define SOC_SDW_AMP_OUT_DAI_ID		2
+#define SOC_SDW_AMP_IN_DAI_ID		3
+#define SOC_SDW_DMIC_DAI_ID		4
+
 struct asoc_sdw_codec_info;
 
 struct asoc_sdw_dai_info {
-- 
cgit v1.2.3


From 6e7af1fdf7da7f0b79475f573d3c1f49a826bd68 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:08 +0530
Subject: ASoC: intel/sdw_utils: move soundwire dai type macros

Move SoundWire dai type macros to common header file(soc_sdw_util.h).
So that these macros will be used by other platform machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-2-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 13941ddd24c8..7912ff7d2bb9 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -36,6 +36,10 @@
 #define SOC_SDW_AMP_IN_DAI_ID		3
 #define SOC_SDW_DMIC_DAI_ID		4
 
+#define SOC_SDW_DAI_TYPE_JACK		0
+#define SOC_SDW_DAI_TYPE_AMP		1
+#define SOC_SDW_DAI_TYPE_MIC		2
+
 struct asoc_sdw_codec_info;
 
 struct asoc_sdw_dai_info {
-- 
cgit v1.2.3


From e377c94773171e8a97e716e57ec67d49b02ded0b Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:09 +0530
Subject: ASoC: intel/sdw_utils: move soundwire codec_info_list structure

SoundWire 'codec_info_list' structure is not a platform specific one.
Move codec_info_list structure to common file soc_sdw_utils.c.
Move codec helper functions which uses codec_info_list structure to common
place holder and rename the function by adding _sdw tag. This will allow
to use 'codec_info_list' structure and it's helper functions in other
platform machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-3-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 7912ff7d2bb9..9d99a460ba27 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -90,8 +90,12 @@ struct asoc_sdw_mc_private {
 	bool ignore_internal_dmic;
 	void *private;
 	unsigned long mc_quirk;
+	int codec_info_list_count;
 };
 
+extern struct asoc_sdw_codec_info codec_info_list[];
+int asoc_sdw_get_codec_info_list_count(void);
+
 int asoc_sdw_startup(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
 int asoc_sdw_prepare(struct snd_pcm_substream *substream);
@@ -106,6 +110,15 @@ const char *asoc_sdw_get_codec_name(struct device *dev,
 				    const struct snd_soc_acpi_link_adr *adr_link,
 				    int adr_index);
 
+struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_part(const u64 adr);
+
+struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_acpi(const u8 *acpi_id);
+
+struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_dai(const char *dai_name,
+							 int *dai_index);
+
+int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd);
+
 /* DMIC support */
 int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd);
 
-- 
cgit v1.2.3


From 778dcb08832a5e526e447971f7ca72cb6dd2307b Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:10 +0530
Subject: ASoC: intel/sdw_utils: move machine driver dai link helper functions

Move machine driver dai link helper functions to common place holder,
So that it can be used by other platform machine driver.
Rename these functions with "asoc_sdw" tag as a prefix.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-4-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 9d99a460ba27..b3b6d6b7ce2f 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -117,6 +117,11 @@ struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_acpi(const u8 *acpi_id);
 struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_dai(const char *dai_name,
 							 int *dai_index);
 
+struct snd_soc_dai_link *asoc_sdw_mc_find_codec_dai_used(struct snd_soc_card *card,
+							 const char *dai_name);
+
+void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card);
+
 int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd);
 
 /* DMIC support */
-- 
cgit v1.2.3


From 5bd414c7b80e39ef11bd86db76e726962c1bfc92 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:11 +0530
Subject: ASoC: sdw_utils: refactor sof_sdw_card_late_probe function

Refactor sof_sdw_card_late_probe() function and derive a generic
function soc_sdw_card_late_probe() function which can be used by
SoundWire generic machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-5-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index b3b6d6b7ce2f..14e21a992f56 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -122,6 +122,8 @@ struct snd_soc_dai_link *asoc_sdw_mc_find_codec_dai_used(struct snd_soc_card *ca
 
 void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card);
 
+int asoc_sdw_card_late_probe(struct snd_soc_card *card);
+
 int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd);
 
 /* DMIC support */
-- 
cgit v1.2.3


From 59f8b622d52e30c5be7f14212c26ca37e810ece9 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:12 +0530
Subject: ASoC: intel/sdw_utils: refactor init_dai_link() and
 init_simple_dai_link()

To make it generic, refactor existing implementation for
init_dai_link() and init_simple_dai_link() as mentioned below.
- Move init_dai_link() and init_simple_dai_link() to common place holder
- Rename the functions with "asoc_sdw" as prefix.
- Pass the platform specific 'platform_component' structure and its size as
arguments for init_simple_dai_link() function and allocate one more
extra dlc for platform component.
- Pass the 'platform_component' and 'num_platforms' as arguments for
init_dai_link().

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-6-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index 14e21a992f56..e366b7968c2d 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -124,6 +124,22 @@ void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card);
 
 int asoc_sdw_card_late_probe(struct snd_soc_card *card);
 
+void asoc_sdw_init_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links,
+			    int *be_id, char *name, int playback, int capture,
+			    struct snd_soc_dai_link_component *cpus, int cpus_num,
+			    struct snd_soc_dai_link_component *platform_component,
+			    int num_platforms, struct snd_soc_dai_link_component *codecs,
+			    int codecs_num, int (*init)(struct snd_soc_pcm_runtime *rtd),
+			    const struct snd_soc_ops *ops);
+
+int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links,
+				  int *be_id, char *name, int playback, int capture,
+				  const char *cpu_dai_name, const char *platform_comp_name,
+				  int num_platforms, const char *codec_name,
+				  const char *codec_dai_name,
+				  int (*init)(struct snd_soc_pcm_runtime *rtd),
+				  const struct snd_soc_ops *ops);
+
 int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd);
 
 /* DMIC support */
-- 
cgit v1.2.3


From 0b8f009ae92fa94ab3c0ca881fce02c336056a73 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 1 Aug 2024 16:48:13 +0530
Subject: ASoC: soc-acpi: add pci revision id field in mach params structure

Few IP's may have same PCI vendor id and device id and
different revision id. Add revision id field to the
'snd_soc_acpi_mach_params' so that using this field platform
specific implementation can be added in machine driver.

Link: https://github.com/thesofproject/linux/pull/5068
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240801111821.18076-7-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 38ccec4e3fcd..b6d301946244 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -70,6 +70,7 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg)
  * @dai_drivers: pointer to dai_drivers, used e.g. in nocodec mode
  * @subsystem_vendor: optional PCI SSID vendor value
  * @subsystem_device: optional PCI SSID device value
+ * @subsystem_rev: optional PCI SSID revision value
  * @subsystem_id_set: true if a value has been written to
  *		      subsystem_vendor and subsystem_device.
  */
@@ -86,6 +87,7 @@ struct snd_soc_acpi_mach_params {
 	struct snd_soc_dai_driver *dai_drivers;
 	unsigned short subsystem_vendor;
 	unsigned short subsystem_device;
+	unsigned short subsystem_rev;
 	bool subsystem_id_set;
 };
 
-- 
cgit v1.2.3


From f0fcdd2cb0db62605d85f3b97a1b443e7c91f886 Mon Sep 17 00:00:00 2001
From: John Allen <john.allen@amd.com>
Date: Tue, 30 Jul 2024 15:17:30 +0000
Subject: ACPI: PRM: Add PRM handler direct call support

Platform Runtime Mechanism (PRM) handlers can be invoked from either the AML
interpreter or directly by an OS driver. Implement the latter.

  [ bp: Massage commit message. ]

Signed-off-by: John Allen <john.allen@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20240730151731.15363-2-john.allen@amd.com
---
 include/linux/prmt.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/prmt.h b/include/linux/prmt.h
index 24da8364b919..9c094294403f 100644
--- a/include/linux/prmt.h
+++ b/include/linux/prmt.h
@@ -2,6 +2,11 @@
 
 #ifdef CONFIG_ACPI_PRMT
 void init_prmt(void);
+int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer);
 #else
 static inline void init_prmt(void) { }
+static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer)
+{
+	return -EOPNOTSUPP;
+}
 #endif
-- 
cgit v1.2.3


From 6b08d07cac64c0dcf1dbb4584bdf95d0f789a520 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <lukma@denx.de>
Date: Wed, 10 Jul 2024 12:06:51 +0200
Subject: leds: trigger: netdev: Add support for tx_err and rx_err notification
 with LEDs

This patch provides support for enabling blinking of LEDs when RX or TX
errors are detected.

Approach taken in this patch is similar to one for TX or RX data
transmission indication (i.e. TRIGGER_NETDEV_TX/RX attribute).

One can inspect transmission errors with:
ip -s link show eth0

Example LED configuration:
cd /sys/devices/platform/amba_pl@0/a001a000.leds/leds/
echo netdev > mode:blue/trigger && \
echo eth0 > mode:blue/device_name && \
echo 1 > mode:blue/tx_err

Signed-off-by: Lukasz Majewski <lukma@denx.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20240710100651.4059887-1-lukma@denx.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6885603f211b..e5968c3ed4ae 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -611,6 +611,8 @@ enum led_trigger_netdev_modes {
 	TRIGGER_NETDEV_FULL_DUPLEX,
 	TRIGGER_NETDEV_TX,
 	TRIGGER_NETDEV_RX,
+	TRIGGER_NETDEV_TX_ERR,
+	TRIGGER_NETDEV_RX_ERR,
 
 	/* Keep last */
 	__TRIGGER_NETDEV_MAX,
-- 
cgit v1.2.3


From 480a8ad683d7a54e8d38c9c7778fb6b15aac241a Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Mon, 22 Jul 2024 15:10:58 +0300
Subject: mfd: adp5585: Add Analog Devices ADP5585 core support

The ADP5585 is a 10/11 input/output port expander with a built in keypad
matrix decoder, programmable logic, reset generator, and PWM generator.
This driver supports the chip by modelling it as an MFD device, with two
child devices for the GPIO and PWM functions.

The driver is derived from an initial implementation from NXP, available
in commit 8059835bee19 ("MLK-25917-1 mfd: adp5585: add ADI adp5585 core
support") in their BSP kernel tree. It has been extensively rewritten.

Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20240722121100.2855-3-laurent.pinchart@ideasonboard.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 126 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 include/linux/mfd/adp5585.h

(limited to 'include')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
new file mode 100644
index 000000000000..016033cd68e4
--- /dev/null
+++ b/include/linux/mfd/adp5585.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Analog Devices ADP5585 I/O expander, PWM controller and keypad controller
+ *
+ * Copyright 2022 NXP
+ * Copyright 2024 Ideas on Board Oy
+ */
+
+#ifndef __MFD_ADP5585_H_
+#define __MFD_ADP5585_H_
+
+#include <linux/bits.h>
+
+#define ADP5585_ID			0x00
+#define		ADP5585_MAN_ID_VALUE		0x20
+#define		ADP5585_MAN_ID_MASK		GENMASK(7, 4)
+#define ADP5585_INT_STATUS		0x01
+#define ADP5585_STATUS			0x02
+#define ADP5585_FIFO_1			0x03
+#define ADP5585_FIFO_2			0x04
+#define ADP5585_FIFO_3			0x05
+#define ADP5585_FIFO_4			0x06
+#define ADP5585_FIFO_5			0x07
+#define ADP5585_FIFO_6			0x08
+#define ADP5585_FIFO_7			0x09
+#define ADP5585_FIFO_8			0x0a
+#define ADP5585_FIFO_9			0x0b
+#define ADP5585_FIFO_10			0x0c
+#define ADP5585_FIFO_11			0x0d
+#define ADP5585_FIFO_12			0x0e
+#define ADP5585_FIFO_13			0x0f
+#define ADP5585_FIFO_14			0x10
+#define ADP5585_FIFO_15			0x11
+#define ADP5585_FIFO_16			0x12
+#define ADP5585_GPI_INT_STAT_A		0x13
+#define ADP5585_GPI_INT_STAT_B		0x14
+#define ADP5585_GPI_STATUS_A		0x15
+#define ADP5585_GPI_STATUS_B		0x16
+#define ADP5585_RPULL_CONFIG_A		0x17
+#define ADP5585_RPULL_CONFIG_B		0x18
+#define ADP5585_RPULL_CONFIG_C		0x19
+#define ADP5585_RPULL_CONFIG_D		0x1a
+#define		ADP5585_Rx_PULL_CFG_PU_300K	0
+#define		ADP5585_Rx_PULL_CFG_PD_300K	1
+#define		ADP5585_Rx_PULL_CFG_PU_100K	2
+#define		ADP5585_Rx_PULL_CFG_DISABLE	3
+#define		ADP5585_Rx_PULL_CFG_MASK	3
+#define ADP5585_GPI_INT_LEVEL_A		0x1b
+#define ADP5585_GPI_INT_LEVEL_B		0x1c
+#define ADP5585_GPI_EVENT_EN_A		0x1d
+#define ADP5585_GPI_EVENT_EN_B		0x1e
+#define ADP5585_GPI_INTERRUPT_EN_A	0x1f
+#define ADP5585_GPI_INTERRUPT_EN_B	0x20
+#define ADP5585_DEBOUNCE_DIS_A		0x21
+#define ADP5585_DEBOUNCE_DIS_B		0x22
+#define ADP5585_GPO_DATA_OUT_A		0x23
+#define ADP5585_GPO_DATA_OUT_B		0x24
+#define ADP5585_GPO_OUT_MODE_A		0x25
+#define ADP5585_GPO_OUT_MODE_B		0x26
+#define ADP5585_GPIO_DIRECTION_A	0x27
+#define ADP5585_GPIO_DIRECTION_B	0x28
+#define ADP5585_RESET1_EVENT_A		0x29
+#define ADP5585_RESET1_EVENT_B		0x2a
+#define ADP5585_RESET1_EVENT_C		0x2b
+#define ADP5585_RESET2_EVENT_A		0x2c
+#define ADP5585_RESET2_EVENT_B		0x2d
+#define ADP5585_RESET_CFG		0x2e
+#define ADP5585_PWM_OFFT_LOW		0x2f
+#define ADP5585_PWM_OFFT_HIGH		0x30
+#define ADP5585_PWM_ONT_LOW		0x31
+#define ADP5585_PWM_ONT_HIGH		0x32
+#define ADP5585_PWM_CFG			0x33
+#define		ADP5585_PWM_IN_AND		BIT(2)
+#define		ADP5585_PWM_MODE		BIT(1)
+#define		ADP5585_PWM_EN			BIT(0)
+#define ADP5585_LOGIC_CFG		0x34
+#define ADP5585_LOGIC_FF_CFG		0x35
+#define ADP5585_LOGIC_INT_EVENT_EN	0x36
+#define ADP5585_POLL_PTIME_CFG		0x37
+#define ADP5585_PIN_CONFIG_A		0x38
+#define ADP5585_PIN_CONFIG_B		0x39
+#define ADP5585_PIN_CONFIG_C		0x3a
+#define		ADP5585_PULL_SELECT		BIT(7)
+#define		ADP5585_C4_EXTEND_CFG_GPIO11	(0U << 6)
+#define		ADP5585_C4_EXTEND_CFG_RESET2	(1U << 6)
+#define		ADP5585_C4_EXTEND_CFG_MASK	GENMASK(6, 6)
+#define		ADP5585_R4_EXTEND_CFG_GPIO5	(0U << 5)
+#define		ADP5585_R4_EXTEND_CFG_RESET1	(1U << 5)
+#define		ADP5585_R4_EXTEND_CFG_MASK	GENMASK(5, 5)
+#define		ADP5585_R3_EXTEND_CFG_GPIO4	(0U << 2)
+#define		ADP5585_R3_EXTEND_CFG_LC	(1U << 2)
+#define		ADP5585_R3_EXTEND_CFG_PWM_OUT	(2U << 2)
+#define		ADP5585_R3_EXTEND_CFG_MASK	GENMASK(3, 2)
+#define		ADP5585_R0_EXTEND_CFG_GPIO1	(0U << 0)
+#define		ADP5585_R0_EXTEND_CFG_LY	(1U << 0)
+#define		ADP5585_R0_EXTEND_CFG_MASK	GENMASK(0, 0)
+#define ADP5585_GENERAL_CFG		0x3b
+#define		ADP5585_OSC_EN			BIT(7)
+#define		ADP5585_OSC_FREQ_50KHZ		(0U << 5)
+#define		ADP5585_OSC_FREQ_100KHZ		(1U << 5)
+#define		ADP5585_OSC_FREQ_200KHZ		(2U << 5)
+#define		ADP5585_OSC_FREQ_500KHZ		(3U << 5)
+#define		ADP5585_OSC_FREQ_MASK		GENMASK(6, 5)
+#define		ADP5585_INT_CFG			BIT(1)
+#define		ADP5585_RST_CFG			BIT(0)
+#define ADP5585_INT_EN			0x3c
+
+#define ADP5585_MAX_REG			ADP5585_INT_EN
+
+/*
+ * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the
+ * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to
+ * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver
+ * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is
+ * marked as reserved in the device tree for variants that don't support it.
+ */
+#define ADP5585_BANK(n)			((n) >= 6 ? 1 : 0)
+#define ADP5585_BIT(n)			((n) >= 6 ? BIT((n) - 6) : BIT(n))
+
+struct regmap;
+
+struct adp5585_dev {
+	struct regmap *regmap;
+};
+
+#endif
-- 
cgit v1.2.3


From bcbfcebda2cbc6a10a347d726e4a4f69e43a864e Mon Sep 17 00:00:00 2001
From: Mohamed Ghanmi <mohamed.ghanmi@supcom.tn>
Date: Sun, 9 Jun 2024 15:48:49 +0100
Subject: platform/x86: asus-wmi: add support for vivobook fan profiles

Add support for vivobook fan profiles wmi call on the ASUS VIVOBOOK
to adjust power limits.

These fan profiles have a different device id than the ROG series
and different order. This reorders the existing modes.

As part of keeping the patch clean the throttle_thermal_policy_available
boolean stored in the driver struct is removed and
throttle_thermal_policy_dev is used in place (as on init it is zeroed).

Co-developed-by: Luke D. Jones <luke@ljones.dev>
Signed-off-by: Luke D. Jones <luke@ljones.dev>
Signed-off-by: Mohamed Ghanmi <mohamed.ghanmi@supcom.tn>
Reviewed-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20240609144849.2532-2-mohamed.ghanmi@supcom.tn
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 0aeeae1c1943..5e6f407b2def 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -68,6 +68,7 @@
 #define ASUS_WMI_DEVID_SCREENPAD_LIGHT	0x00050032
 #define ASUS_WMI_DEVID_FAN_BOOST_MODE	0x00110018
 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075
+#define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO 0x00110019
 
 /* Misc */
 #define ASUS_WMI_DEVID_PANEL_OD		0x00050019
-- 
cgit v1.2.3


From 3908ba2e0b2476e2ec13e15967bf6a37e449f2af Mon Sep 17 00:00:00 2001
From: Nick Hu <nick.hu@sifive.com>
Date: Wed, 17 Jul 2024 11:17:14 +0800
Subject: RISC-V: Enable the IPI before workqueue_online_cpu()

Sometimes the hotplug cpu stalls at the arch_cpu_idle() for a while after
workqueue_online_cpu(). When cpu stalls at the idle loop, the reschedule
IPI is pending. However the enable bit is not enabled yet so the cpu stalls
at WFI until watchdog timeout. Therefore enable the IPI before the
workqueue_online_cpu() to fix the issue.

Fixes: 63c5484e7495 ("workqueue: Add multiple affinity scopes and interface to select them")
Signed-off-by: Nick Hu <nick.hu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20240717031714.1946036-1-nick.hu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 51ba681b915a..e30d93b807d5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -148,6 +148,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_LOONGARCH_STARTING,
 	CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 	CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
+	CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
-- 
cgit v1.2.3


From 53369581dc0c68a5700ed51e1660f44c4b2bb524 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 1 Aug 2024 08:41:17 -0700
Subject: drm/printer: Allow NULL data in devcoredump printer

We want to determine the size of the devcoredump before writing it out.
To that end, we will run the devcoredump printer with NULL data to get
the size, alloc data based on the generated offset, then run the
devcorecump again with a valid data pointer to print.  This necessitates
not writing data to the data pointer on the initial pass, when it is
NULL.

v5:
 - Better commit message (Jonathan)
 - Add kerenl doc with examples (Jani)

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-3-matthew.brost@intel.com
---
 include/drm/drm_print.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 5d9dff5149c9..d2676831d765 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -221,7 +221,8 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va)
 
 /**
  * struct drm_print_iterator - local struct used with drm_printer_coredump
- * @data: Pointer to the devcoredump output buffer
+ * @data: Pointer to the devcoredump output buffer, can be NULL if using
+ * drm_printer_coredump to determine size of devcoredump
  * @start: The offset within the buffer to start writing
  * @remain: The number of bytes to write for this iteration
  */
@@ -266,6 +267,57 @@ struct drm_print_iterator {
  *			coredump_read, ...)
  *	}
  *
+ * The above example has a time complexity of O(N^2), where N is the size of the
+ * devcoredump. This is acceptable for small devcoredumps but scales poorly for
+ * larger ones.
+ *
+ * Another use case for drm_coredump_printer is to capture the devcoredump into
+ * a saved buffer before the dev_coredump() callback. This involves two passes:
+ * one to determine the size of the devcoredump and another to print it to a
+ * buffer. Then, in dev_coredump(), copy from the saved buffer into the
+ * devcoredump read buffer.
+ *
+ * For example::
+ *
+ *	char *devcoredump_saved_buffer;
+ *
+ *	ssize_t __coredump_print(char *buffer, ssize_t count, ...)
+ *	{
+ *		struct drm_print_iterator iter;
+ *		struct drm_printer p;
+ *
+ *		iter.data = buffer;
+ *		iter.start = 0;
+ *		iter.remain = count;
+ *
+ *		p = drm_coredump_printer(&iter);
+ *
+ *		drm_printf(p, "foo=%d\n", foo);
+ *		...
+ *		return count - iter.remain;
+ *	}
+ *
+ *	void coredump_print(...)
+ *	{
+ *		ssize_t count;
+ *
+ *		count = __coredump_print(NULL, INT_MAX, ...);
+ *		devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL);
+ *		__coredump_print(devcoredump_saved_buffer, count, ...);
+ *	}
+ *
+ *	void coredump_read(char *buffer, loff_t offset, size_t count,
+ *			   void *data, size_t datalen)
+ *	{
+ *		...
+ *		memcpy(buffer, devcoredump_saved_buffer + offset, count);
+ *		...
+ *	}
+ *
+ * The above example has a time complexity of O(N*2), where N is the size of the
+ * devcoredump. This scales better than the previous example for larger
+ * devcoredumps.
+ *
  * RETURNS:
  * The &drm_printer object
  */
-- 
cgit v1.2.3


From b40824500eaa77668026b6d1ade6924901a680f9 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Tue, 30 Jul 2024 14:38:07 +0900
Subject: ata: libata: Remove ata_noop_qc_prep()

The function ata_noop_qc_prep(), as its name implies, does nothing and
simply returns AC_ERR_OK. For drivers that do not need any special
preparations of queued commands, we can avoid having to define struct
ata_port qc_prep operation by simply testing if that operation is
defined or not in ata_qc_issue(). Make this change and remove
ata_noop_qc_prep().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
---
 include/linux/libata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index d598ef690e50..d5446e18d9df 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1168,7 +1168,6 @@ extern int ata_xfer_mode2shift(u8 xfer_mode);
 extern const char *ata_mode_string(unsigned int xfer_mask);
 extern unsigned int ata_id_xfermask(const u16 *id);
 extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
-extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
 		 unsigned int n_elem);
 extern unsigned int ata_dev_classify(const struct ata_taskfile *tf);
-- 
cgit v1.2.3


From bf1807c6ee1f66608c7835f6f9d9139c9c477942 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 1 Aug 2024 18:04:22 +0900
Subject: ata: libata: Print device quirks only once

In ata_dev_print_quirks(), return early if ata_dev_print_info() returns
false or if we already printed quirk information. This is to avoid
printing a device quirks multiple times (that is, each time
ata_dev_revalidate() is called).

To remember if ata_dev_print_quirks() was already executed, define the
EH context flag ATA_EHI_DID_PRINT_QUIRKS and set this flag in
ata_dev_print_quirks().

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Fixes: 58157d607aec ("ata: libata: Print quirks applied to devices")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index d5446e18d9df..0279c0a6302f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -378,6 +378,7 @@ enum {
 	ATA_EHI_PRINTINFO	= (1 << 18), /* print configuration info */
 	ATA_EHI_SETMODE		= (1 << 19), /* configure transfer mode */
 	ATA_EHI_POST_SETMODE	= (1 << 20), /* revalidating after setmode */
+	ATA_EHI_DID_PRINT_QUIRKS = (1 << 21), /* already printed quirks info */
 
 	ATA_EHI_DID_RESET	= ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET,
 
-- 
cgit v1.2.3


From 744500d81f81346b4d442809c0511684bb28c829 Mon Sep 17 00:00:00 2001
From: Luigi Leonardi <luigi.leonardi@outlook.com>
Date: Tue, 30 Jul 2024 21:43:06 +0200
Subject: vsock: add support for SIOCOUTQ ioctl

Add support for ioctl(s) in AF_VSOCK.
The only ioctl available is SIOCOUTQ/TIOCOUTQ, which returns the number
of unsent bytes in the socket. This information is transport-specific
and is delegated to them using a callback.

Suggested-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Signed-off-by: Luigi Leonardi <luigi.leonardi@outlook.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 535701efc1e5..fc504d2da3d0 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -169,6 +169,9 @@ struct vsock_transport {
 	void (*notify_buffer_size)(struct vsock_sock *, u64 *);
 	int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
 
+	/* SIOCOUTQ ioctl */
+	ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
+
 	/* Shutdown. */
 	int (*shutdown)(struct vsock_sock *, int);
 
-- 
cgit v1.2.3


From e6ab45005772014ce49a693a63f928203c0cbbb0 Mon Sep 17 00:00:00 2001
From: Luigi Leonardi <luigi.leonardi@outlook.com>
Date: Tue, 30 Jul 2024 21:43:07 +0200
Subject: vsock/virtio: add SIOCOUTQ support for all virtio based transports

Introduce support for virtio_transport_unsent_bytes
ioctl for virtio_transport, vhost_vsock and vsock_loopback.

For all transports the unsent bytes counter is incremented
in virtio_transport_get_credit.

In virtio_transport (G2H) and in vhost-vsock (H2G) the counter
is decremented when the skbuff is consumed. In vsock_loopback the
same skbuff is passed from the transmitter to the receiver, so
the counter is decremented before queuing the skbuff to the
receiver.

Signed-off-by: Luigi Leonardi <luigi.leonardi@outlook.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index c82089dee0c8..0387d64e2c66 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -133,6 +133,7 @@ struct virtio_vsock_sock {
 	u32 tx_cnt;
 	u32 peer_fwd_cnt;
 	u32 peer_buf_alloc;
+	size_t bytes_unsent;
 
 	/* Protected by rx_lock */
 	u32 fwd_cnt;
@@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
 
+ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
+
+void virtio_transport_consume_skb_sent(struct sk_buff *skb,
+				       bool consume);
+
 int virtio_transport_do_socket_init(struct vsock_sock *vsk,
 				 struct vsock_sock *psk);
 int
-- 
cgit v1.2.3


From 042859e80d4b67ff93f19009c02d6a8a735b0fd9 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Mon, 29 Jul 2024 21:26:43 +0100
Subject: dt-bindings: clock: renesas: Document RZ/V2H(P) SoC CPG

Document the device tree bindings for the Renesas RZ/V2H(P) SoC
Clock Pulse Generator (CPG).

CPG block handles the below operations:
- Generation and control of clock signals for the IP modules
- Generation and control of resets
- Control over booting
- Low power consumption and power supply domains

Also define constants for the core clocks of the RZ/V2H(P) SoC. Note the
core clocks are a subset of the ones which are listed as part of section
4.4.2 of HW manual Rev.1.01 which cannot be controlled by CLKON register.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/20240729202645.263525-2-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/dt-bindings/clock/renesas,r9a09g057-cpg.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h
new file mode 100644
index 000000000000..541e6d719bd6
--- /dev/null
+++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+ *
+ * Copyright (C) 2024 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__
+#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* Core Clock list */
+#define R9A09G057_SYS_0_PCLK			0
+#define R9A09G057_CA55_0_CORE_CLK0		1
+#define R9A09G057_CA55_0_CORE_CLK1		2
+#define R9A09G057_CA55_0_CORE_CLK2		3
+#define R9A09G057_CA55_0_CORE_CLK3		4
+#define R9A09G057_CA55_0_PERIPHCLK		5
+#define R9A09G057_CM33_CLK0			6
+#define R9A09G057_CST_0_SWCLKTCK		7
+#define R9A09G057_IOTOP_0_SHCLK			8
+
+#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */
-- 
cgit v1.2.3


From 79bd233010859463e46a0a4b3926eaaba25a6110 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Tue, 30 Jul 2024 09:44:14 +0100
Subject: perf: Rename perf_event_context.nr_pending to nr_no_switch_fast.

nr_pending counts the number of events in the context that
either pending_sigtrap or pending_work, but it is used
to prevent taking the fast path in perf_event_context_sched_out.

Renamed to reflect what it is used for, rather than what it
counts. This change allows using the field to track other
event properties that also require skipping the fast path
without possible confusion over the name.

Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240730084417.7693-2-ben.gainey@arm.com
---
 include/linux/perf_event.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bb0c21d6335..655f66b18418 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -966,12 +966,13 @@ struct perf_event_context {
 	struct rcu_head			rcu_head;
 
 	/*
-	 * Sum (event->pending_work + event->pending_work)
+	 * The count of events for which using the switch-out fast path
+	 * should be avoided.
 	 *
 	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
 	 * that until the signal is delivered.
 	 */
-	local_t				nr_pending;
+	local_t				nr_no_switch_fast;
 };
 
 struct perf_cpu_pmu_context {
-- 
cgit v1.2.3


From 7e8b255650fcfa1d05a57e4093d8405e6d8dd488 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Tue, 30 Jul 2024 09:44:15 +0100
Subject: perf: Support PERF_SAMPLE_READ with inherit

This change allows events to use PERF_SAMPLE_READ with inherit
so long as PERF_SAMPLE_TID is also set. This enables sample based
profiling of a group of counters over a hierarchy of processes or
threads. This is useful, for example, for collecting per-thread
counters/metrics, event based sampling of multiple counters as a unit,
access to the enabled and running time when using multiplexing and so
on.

Prior to this, users were restricted to either collecting aggregate
statistics for a multi-threaded/-process application (e.g. with
"perf stat"), or to sample individual threads, or to profile the entire
system (which requires root or CAP_PERFMON, and may produce much more
data than is required). Theoretically a tool could poll for or otherwise
monitor thread/process creation and construct whatever events the user
is interested in using perf_event_open, for each new thread or process,
but this is racy, can lead to file-descriptor exhaustion, and ultimately
just replicates the behaviour of inherit, but in userspace.

This configuration differs from inherit without PERF_SAMPLE_READ in that
the accumulated event count, and consequently any sample (such as if
triggered by overflow of sample_period) will be on a per-thread rather
than on an aggregate basis.

The meaning of read_format::value field of both PERF_RECORD_READ and
PERF_RECORD_SAMPLE is changed such that if the sampled event uses this
new configuration then the values reported will be per-thread rather
than the global aggregate value. This is a change from the existing
semantics of read_format (where PERF_SAMPLE_READ is used without
inherit), but it is necessary to expose the per-thread counter values,
and it avoids reinventing a separate "read_format_thread" field that
otherwise replicates the same behaviour. This change should not break
existing tools, since this configuration was not previously valid and
was rejected by the kernel. Tools that opt into this new mode will need
to account for this when calculating the counter delta for a given
sample. Tools that wish to have both the per-thread and aggregate value
can perform the global aggregation themselves from the per-thread
values.

The change to read_format::value does not affect existing valid
perf_event_attr configurations, nor does it change the behaviour of
calls to "read" on an event descriptor. Both continue to report the
aggregate value for the entire thread/process hierarchy. The difference
between the results reported by "read" and PERF_RECORD_SAMPLE in this
new configuration is justified on the basis that it is not (easily)
possible for "read" to target a specific thread (the caller only has
the fd for the original parent event).

Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240730084417.7693-3-ben.gainey@arm.com
---
 include/linux/perf_event.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 655f66b18418..701549967c18 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -969,6 +969,9 @@ struct perf_event_context {
 	 * The count of events for which using the switch-out fast path
 	 * should be avoided.
 	 *
+	 * Sum (event->pending_work + events with
+	 *    (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
+	 *
 	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
 	 * that until the signal is delivered.
 	 */
-- 
cgit v1.2.3


From cfa7f3d2c526c224a6271cc78a4a27a0de06f4f0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 29 Jul 2024 10:52:23 -0700
Subject: perf,x86: avoid missing caller address in stack traces captured in
 uprobe

When tracing user functions with uprobe functionality, it's common to
install the probe (e.g., a BPF program) at the first instruction of the
function. This is often going to be `push %rbp` instruction in function
preamble, which means that within that function frame pointer hasn't
been established yet. This leads to consistently missing an actual
caller of the traced function, because perf_callchain_user() only
records current IP (capturing traced function) and then following frame
pointer chain (which would be caller's frame, containing the address of
caller's caller).

So when we have target_1 -> target_2 -> target_3 call chain and we are
tracing an entry to target_3, captured stack trace will report
target_1 -> target_3 call chain, which is wrong and confusing.

This patch proposes a x86-64-specific heuristic to detect `push %rbp`
(`push %ebp` on 32-bit architecture) instruction being traced. Given
entire kernel implementation of user space stack trace capturing works
under assumption that user space code was compiled with frame pointer
register (%rbp/%ebp) preservation, it seems pretty reasonable to use
this instruction as a strong indicator that this is the entry to the
function. In that case, return address is still pointed to by %rsp/%esp,
so we fetch it and add to stack trace before proceeding to unwind the
rest using frame pointer-based logic.

We also check for `endbr64` (for 64-bit modes) as another common pattern
for function entry, as suggested by Josh Poimboeuf. Even if we get this
wrong sometimes for uprobes attached not at the function entry, it's OK
because stack trace will still be overall meaningful, just with one
extra bogus entry. If we don't detect this, we end up with guaranteed to
be missing caller function entry in the stack trace, which is worse
overall.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240729175223.23914-1-andrii@kernel.org
---
 include/linux/uprobes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b503fafb7fb3..a270a5892ab4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -76,6 +76,8 @@ struct uprobe_task {
 	struct uprobe			*active_uprobe;
 	unsigned long			xol_vaddr;
 
+	struct arch_uprobe              *auprobe;
+
 	struct return_instance		*return_instances;
 	unsigned int			depth;
 };
-- 
cgit v1.2.3


From e04332ebc8ac128fa551e83f1161ab1c094d13a9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 1 Aug 2024 15:27:28 +0200
Subject: uprobes: kill uprobe_register_refctr()

It doesn't make any sense to have 2 versions of _register(). Note that
trace_uprobe_enable(), the only user of uprobe_register(), doesn't need
to check tu->ref_ctr_offset to decide which one should be used, it could
safely pass ref_ctr_offset == 0 to uprobe_register_refctr().

Add this argument to uprobe_register(), update the callers, and kill
uprobe_register_refctr().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240801132728.GA8800@redhat.com
---
 include/linux/uprobes.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index a270a5892ab4..788813c0b8fc 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -112,8 +112,7 @@ extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
-extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
-extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
+extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
@@ -154,11 +153,7 @@ static inline void uprobes_init(void)
 #define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
 
 static inline int
-uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
-{
-	return -ENOSYS;
-}
-static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From 3c83a9ad0295eb63bdeb81d821b8c3b9417fbcac Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 1 Aug 2024 15:27:34 +0200
Subject: uprobes: make uprobe_register() return struct uprobe *

This way uprobe_unregister() and uprobe_apply() can use "struct uprobe *"
rather than inode + offset. This simplifies the code and allows to avoid
the unnecessary find_uprobe() + put_uprobe() in these functions.

TODO: uprobe_unregister() still needs get_uprobe/put_uprobe to ensure that
this uprobe can't be freed before up_write(&uprobe->register_rwsem).

Co-developed-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20240801132734.GA8803@redhat.com
---
 include/linux/uprobes.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 788813c0b8fc..f50df1fa93e7 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 
+struct uprobe;
 struct vm_area_struct;
 struct mm_struct;
 struct inode;
@@ -112,9 +113,9 @@ extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
-extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
-extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
-extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
+extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
+extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void uprobe_start_dup_mmap(void);
@@ -152,18 +153,18 @@ static inline void uprobes_init(void)
 
 #define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
 
-static inline int
+static inline struct uprobe *
 uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
 {
-	return -ENOSYS;
+	return ERR_PTR(-ENOSYS);
 }
 static inline int
-uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
+uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add)
 {
 	return -ENOSYS;
 }
 static inline void
-uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
+uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 }
 static inline int uprobe_mmap(struct vm_area_struct *vma)
-- 
cgit v1.2.3


From 6edb8cd87cca5dd4389d4ea2d84b66ea94341bbd Mon Sep 17 00:00:00 2001
From: Kerem Karabay <kekrby@gmail.com>
Date: Fri, 5 Jul 2024 11:17:42 +0000
Subject: HID: core: add helper for finding a field with a certain usage

This helper will allow HID drivers to easily determine if they should
bind to a hid_device by checking for the prescence of a certain field
when its ID is not enough, which can be the case on USB devices with
multiple interfaces and/or configurations.

Convert google-hammer driver to use it, and remove now superfluous
hammer_has_usage().

[jkosina@suse.com: expand changelog with the information about
 google-hammer being added as user of this API ]
Signed-off-by: Kerem Karabay <kekrby@gmail.com>
Signed-off-by: Aditya Garg <gargaditya08@live.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 1533c9dcd3a6..2deff79f39a1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -940,6 +940,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep
 extern int hidinput_connect(struct hid_device *hid, unsigned int force);
 extern void hidinput_disconnect(struct hid_device *);
 
+struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
+				 unsigned int application, unsigned int usage);
 int hid_set_field(struct hid_field *, unsigned, __s32);
 int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
 		     int interrupt);
-- 
cgit v1.2.3


From 79f194dd54cef3a0212914cd79dda1f91b7bf7f6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 29 Jul 2024 18:11:29 +0200
Subject: thermal: trip: Get rid of thermal_zone_get_num_trips()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only existing caller of thermal_zone_get_num_trips(), which is
rcar_gen3_thermal_probe(), uses this function to put the number of
trip points into a kernel log message, but this information is also
available from the thermal sysfs interface.

For this reason, remove the thermal_zone_get_num_trips() call from
rcar_gen3_thermal_probe() and drop the former altogether.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://patch.msgid.link/2636988.Lt9SDvczpP@rjwysocki.net
---
 include/linux/thermal.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 25fbf960b474..4fdfd5abd2c4 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -210,7 +210,6 @@ int for_each_thermal_trip(struct thermal_zone_device *tz,
 int thermal_zone_for_each_trip(struct thermal_zone_device *tz,
 			       int (*cb)(struct thermal_trip *, void *),
 			       void *data);
-int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
 void thermal_zone_set_trip_temp(struct thermal_zone_device *tz,
 				struct thermal_trip *trip, int temp);
 
-- 
cgit v1.2.3


From 8ecd953ca5850d2f83f548d4bbcf672d2d2cd0ca Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 29 Jul 2024 18:12:45 +0200
Subject: thermal: trip: Drop thermal_zone_get_trip()

There are no more callers of thermal_zone_get_trip() in the tree, so
drop it.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://patch.msgid.link/2220301.Mh6RI2rZIc@rjwysocki.net
---
 include/linux/thermal.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4fdfd5abd2c4..db44a57a7474 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -202,8 +202,6 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev,
 }
 #endif
 
-int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
-			  struct thermal_trip *trip);
 int for_each_thermal_trip(struct thermal_zone_device *tz,
 			  int (*cb)(struct thermal_trip *, void *),
 			  void *data);
-- 
cgit v1.2.3


From c8a132e2e032b00828d51141ab34f9aeb24f44ae Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 2 Aug 2024 11:57:32 +0100
Subject: ASoC: soc-component: Add new snd_soc_component_get_kcontrol() helpers

Add new helper functions snd_soc_component_get_kcontrol() and
snd_soc_component_get_kcontrol_locked() that returns a kcontrol
by name, but will factor in the components name_prefix, to handle
situations where multiple components are present with the same
controls.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20240802105734.2309788-3-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index ceca69b46a82..bf2e381cd124 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked(
 	const char *pin);
 
 /* component controls */
+struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component,
+						    const char * const ctl);
+struct snd_kcontrol *
+snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component,
+				      const char * const ctl);
 int snd_soc_component_notify_control(struct snd_soc_component *component,
 				     const char * const ctl);
 
-- 
cgit v1.2.3


From 6a965fbaac461564ae74dbfe6d9c9e9de65ea67a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 2 Aug 2024 14:40:07 +0200
Subject: ASoC: Intel: soc-acpi: add PTL match tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now the tables are basic for mockup devices and headset codec support

Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://patch.msgid.link/20240802124011.173820-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi-intel-match.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h
index 4843b57798f6..daed7123df9d 100644
--- a/include/sound/soc-acpi-intel-match.h
+++ b/include/sound/soc-acpi-intel-match.h
@@ -33,6 +33,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_machines[];
 
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_sdw_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cfl_sdw_machines[];
@@ -44,6 +45,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_sdw_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_sdw_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[];
 
 /*
  * generic table used for HDA codec-based platforms, possibly with
-- 
cgit v1.2.3


From 92b796845a4a8789c2d9434c6a77baa88a99121e Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Fri, 2 Aug 2024 15:20:52 +0800
Subject: ASoC: tas2781: Fix a compiling warning reported by robot kernel test
 due to adding tas2563_dvc_table

Move tas2563_dvc_table into a separate Header file, as only tas2781
codec driver use this table, and hda side codec driver won't use it.

Fixes: 75ed63a5ab5d ("ASoC: tas2781: Add new Kontrol to set tas2563 digital Volume")
Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240802072055.1462-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2563-tlv.h | 279 ++++++++++++++++++++++++++++++++++++++++++++
 include/sound/tas2781-tlv.h | 260 -----------------------------------------
 2 files changed, 279 insertions(+), 260 deletions(-)
 create mode 100644 include/sound/tas2563-tlv.h

(limited to 'include')

diff --git a/include/sound/tas2563-tlv.h b/include/sound/tas2563-tlv.h
new file mode 100644
index 000000000000..faa3e194f73b
--- /dev/null
+++ b/include/sound/tas2563-tlv.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+//
+// ALSA SoC Texas Instruments TAS2563 Audio Smart Amplifier
+//
+// Copyright (C) 2022 - 2024 Texas Instruments Incorporated
+// https://www.ti.com
+//
+// The TAS2563 driver implements a flexible and configurable
+// algo coefficient setting for one, two, or even multiple
+// TAS2563 chips.
+//
+// Author: Shenghao Ding <shenghao-ding@ti.com>
+//
+
+#ifndef __TAS2563_TLV_H__
+#define __TAS2563_TLV_H__
+
+static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
+
+/* pow(10, db/20) * pow(2,30) */
+static const unsigned char tas2563_dvc_table[][4] = {
+	{ 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */
+	{ 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */
+	{ 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */
+	{ 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */
+	{ 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */
+	{ 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */
+	{ 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */
+	{ 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */
+	{ 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */
+	{ 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */
+	{ 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */
+	{ 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */
+	{ 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */
+	{ 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */
+	{ 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */
+	{ 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */
+	{ 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */
+	{ 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */
+	{ 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */
+	{ 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */
+	{ 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */
+	{ 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */
+	{ 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */
+	{ 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */
+	{ 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */
+	{ 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */
+	{ 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */
+	{ 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */
+	{ 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */
+	{ 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */
+	{ 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */
+	{ 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */
+	{ 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */
+	{ 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */
+	{ 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */
+	{ 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */
+	{ 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */
+	{ 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */
+	{ 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */
+	{ 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */
+	{ 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */
+	{ 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */
+	{ 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */
+	{ 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */
+	{ 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */
+	{ 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */
+	{ 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */
+	{ 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */
+	{ 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */
+	{ 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */
+	{ 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */
+	{ 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */
+	{ 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */
+	{ 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */
+	{ 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */
+	{ 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */
+	{ 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */
+	{ 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */
+	{ 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */
+	{ 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */
+	{ 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */
+	{ 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */
+	{ 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */
+	{ 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */
+	{ 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */
+	{ 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */
+	{ 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */
+	{ 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */
+	{ 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */
+	{ 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */
+	{ 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */
+	{ 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */
+	{ 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */
+	{ 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */
+	{ 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */
+	{ 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */
+	{ 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */
+	{ 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */
+	{ 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */
+	{ 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */
+	{ 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */
+	{ 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */
+	{ 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */
+	{ 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */
+	{ 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */
+	{ 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */
+	{ 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */
+	{ 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */
+	{ 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */
+	{ 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */
+	{ 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */
+	{ 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */
+	{ 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */
+	{ 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */
+	{ 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */
+	{ 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */
+	{ 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */
+	{ 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */
+	{ 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */
+	{ 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */
+	{ 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */
+	{ 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */
+	{ 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */
+	{ 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */
+	{ 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */
+	{ 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */
+	{ 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */
+	{ 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */
+	{ 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */
+	{ 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */
+	{ 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */
+	{ 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */
+	{ 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */
+	{ 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */
+	{ 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */
+	{ 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */
+	{ 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */
+	{ 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */
+	{ 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */
+	{ 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */
+	{ 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */
+	{ 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */
+	{ 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */
+	{ 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */
+	{ 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */
+	{ 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */
+	{ 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */
+	{ 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */
+	{ 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */
+	{ 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */
+	{ 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */
+	{ 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */
+	{ 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */
+	{ 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */
+	{ 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */
+	{ 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */
+	{ 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */
+	{ 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */
+	{ 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */
+	{ 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */
+	{ 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */
+	{ 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */
+	{ 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */
+	{ 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */
+	{ 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */
+	{ 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */
+	{ 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */
+	{ 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */
+	{ 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */
+	{ 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */
+	{ 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */
+	{ 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */
+	{ 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */
+	{ 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */
+	{ 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */
+	{ 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */
+	{ 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */
+	{ 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */
+	{ 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */
+	{ 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */
+	{ 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */
+	{ 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */
+	{ 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */
+	{ 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */
+	{ 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */
+	{ 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */
+	{ 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */
+	{ 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */
+	{ 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */
+	{ 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */
+	{ 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */
+	{ 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */
+	{ 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */
+	{ 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */
+	{ 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */
+	{ 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */
+	{ 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */
+	{ 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */
+	{ 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */
+	{ 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */
+	{ 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */
+	{ 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */
+	{ 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */
+	{ 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */
+	{ 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */
+	{ 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */
+	{ 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */
+	{ 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */
+	{ 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */
+	{ 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */
+	{ 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */
+	{ 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */
+	{ 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */
+	{ 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */
+	{ 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */
+	{ 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */
+	{ 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */
+	{ 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */
+	{ 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */
+	{ 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */
+	{ 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */
+	{ 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */
+	{ 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */
+	{ 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */
+	{ 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */
+	{ 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */
+	{ 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */
+	{ 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */
+	{ 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */
+	{ 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */
+	{ 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */
+	{ 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */
+	{ 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */
+	{ 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */
+	{ 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */
+	{ 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */
+	{ 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */
+	{ 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */
+	{ 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */
+	{ 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */
+	{ 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */
+	{ 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */
+	{ 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */
+	{ 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */
+	{ 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */
+	{ 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */
+	{ 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */
+	{ 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */
+	{ 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */
+	{ 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */
+	{ 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */
+	{ 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */
+	{ 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */
+	{ 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */
+	{ 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */
+	{ 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */
+	{ 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */
+	{ 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */
+	{ 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */
+	{ 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */
+	{ 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */
+	{ 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */
+	{ 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */
+	{ 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */
+	{ 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */
+	{ 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */
+	{ 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */
+	{ 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */
+	{ 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */
+	{ 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */
+	{ 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */
+	{ 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */
+	{ 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */
+	{ 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */
+	{ 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */
+	{ 0X7F, 0XFF, 0XFF, 0XFF }, /* 6.0db */
+};
+#endif
diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h
index 00fd4d449ff3..d87263e43fdb 100644
--- a/include/sound/tas2781-tlv.h
+++ b/include/sound/tas2781-tlv.h
@@ -17,265 +17,5 @@
 
 static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
 static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
-static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
 
-/* pow(10, db/20) * pow(2,30) */
-static const __maybe_unused unsigned char tas2563_dvc_table[][4] = {
-	{ 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */
-	{ 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */
-	{ 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */
-	{ 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */
-	{ 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */
-	{ 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */
-	{ 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */
-	{ 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */
-	{ 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */
-	{ 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */
-	{ 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */
-	{ 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */
-	{ 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */
-	{ 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */
-	{ 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */
-	{ 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */
-	{ 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */
-	{ 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */
-	{ 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */
-	{ 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */
-	{ 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */
-	{ 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */
-	{ 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */
-	{ 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */
-	{ 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */
-	{ 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */
-	{ 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */
-	{ 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */
-	{ 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */
-	{ 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */
-	{ 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */
-	{ 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */
-	{ 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */
-	{ 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */
-	{ 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */
-	{ 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */
-	{ 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */
-	{ 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */
-	{ 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */
-	{ 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */
-	{ 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */
-	{ 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */
-	{ 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */
-	{ 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */
-	{ 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */
-	{ 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */
-	{ 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */
-	{ 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */
-	{ 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */
-	{ 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */
-	{ 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */
-	{ 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */
-	{ 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */
-	{ 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */
-	{ 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */
-	{ 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */
-	{ 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */
-	{ 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */
-	{ 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */
-	{ 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */
-	{ 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */
-	{ 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */
-	{ 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */
-	{ 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */
-	{ 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */
-	{ 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */
-	{ 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */
-	{ 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */
-	{ 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */
-	{ 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */
-	{ 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */
-	{ 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */
-	{ 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */
-	{ 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */
-	{ 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */
-	{ 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */
-	{ 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */
-	{ 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */
-	{ 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */
-	{ 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */
-	{ 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */
-	{ 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */
-	{ 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */
-	{ 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */
-	{ 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */
-	{ 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */
-	{ 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */
-	{ 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */
-	{ 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */
-	{ 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */
-	{ 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */
-	{ 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */
-	{ 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */
-	{ 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */
-	{ 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */
-	{ 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */
-	{ 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */
-	{ 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */
-	{ 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */
-	{ 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */
-	{ 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */
-	{ 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */
-	{ 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */
-	{ 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */
-	{ 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */
-	{ 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */
-	{ 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */
-	{ 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */
-	{ 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */
-	{ 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */
-	{ 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */
-	{ 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */
-	{ 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */
-	{ 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */
-	{ 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */
-	{ 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */
-	{ 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */
-	{ 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */
-	{ 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */
-	{ 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */
-	{ 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */
-	{ 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */
-	{ 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */
-	{ 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */
-	{ 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */
-	{ 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */
-	{ 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */
-	{ 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */
-	{ 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */
-	{ 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */
-	{ 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */
-	{ 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */
-	{ 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */
-	{ 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */
-	{ 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */
-	{ 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */
-	{ 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */
-	{ 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */
-	{ 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */
-	{ 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */
-	{ 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */
-	{ 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */
-	{ 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */
-	{ 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */
-	{ 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */
-	{ 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */
-	{ 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */
-	{ 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */
-	{ 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */
-	{ 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */
-	{ 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */
-	{ 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */
-	{ 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */
-	{ 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */
-	{ 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */
-	{ 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */
-	{ 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */
-	{ 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */
-	{ 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */
-	{ 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */
-	{ 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */
-	{ 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */
-	{ 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */
-	{ 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */
-	{ 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */
-	{ 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */
-	{ 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */
-	{ 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */
-	{ 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */
-	{ 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */
-	{ 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */
-	{ 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */
-	{ 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */
-	{ 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */
-	{ 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */
-	{ 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */
-	{ 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */
-	{ 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */
-	{ 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */
-	{ 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */
-	{ 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */
-	{ 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */
-	{ 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */
-	{ 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */
-	{ 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */
-	{ 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */
-	{ 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */
-	{ 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */
-	{ 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */
-	{ 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */
-	{ 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */
-	{ 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */
-	{ 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */
-	{ 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */
-	{ 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */
-	{ 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */
-	{ 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */
-	{ 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */
-	{ 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */
-	{ 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */
-	{ 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */
-	{ 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */
-	{ 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */
-	{ 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */
-	{ 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */
-	{ 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */
-	{ 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */
-	{ 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */
-	{ 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */
-	{ 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */
-	{ 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */
-	{ 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */
-	{ 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */
-	{ 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */
-	{ 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */
-	{ 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */
-	{ 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */
-	{ 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */
-	{ 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */
-	{ 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */
-	{ 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */
-	{ 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */
-	{ 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */
-	{ 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */
-	{ 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */
-	{ 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */
-	{ 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */
-	{ 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */
-	{ 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */
-	{ 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */
-	{ 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */
-	{ 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */
-	{ 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */
-	{ 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */
-	{ 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */
-	{ 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */
-	{ 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */
-	{ 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */
-	{ 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */
-	{ 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */
-	{ 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */
-	{ 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */
-	{ 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */
-	{ 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */
-	{ 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */
-	{ 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */
-	{ 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */
-	{ 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */
-	{ 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */
-	{ 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */
-	{ 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */
-	{ 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */
-	{ 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */
-	{ 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */
-	{ 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */
-	{ 0XFF, 0XFF, 0XFF, 0XFF }, /* 6.0db */
-};
 #endif
-- 
cgit v1.2.3


From 54233a4254036efca91b9bffbd398ecf39e90555 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 30 Jul 2024 17:30:40 +0200
Subject: uretprobe: change syscall number, again

Despite multiple attempts to get the syscall number assignment right
for the newly added uretprobe syscall, we ended up with a bit of a mess:

 - The number is defined as 467 based on the assumption that the
   xattrat family of syscalls would use 463 through 466, but those
   did not make it into 6.11.

 - The include/uapi/asm-generic/unistd.h file still lists the number
   463, but the new scripts/syscall.tbl that was supposed to have the
   same data lists 467 instead as the number for arc, arm64, csky,
   hexagon, loongarch, nios2, openrisc and riscv. None of these
   architectures actually provide a uretprobe syscall.

 - All the other architectures (powerpc, arm, mips, ...) don't list
   this syscall at all.

There are two ways to make it consistent again: either list it with
the same syscall number on all architectures, or only list it on x86
but not in scripts/syscall.tbl and asm-generic/unistd.h.

Based on the most recent discussion, it seems like we won't need it
anywhere else, so just remove the inconsistent assignment and instead
move the x86 number to the next available one in the architecture
specific range, which is 335.

Fixes: 5c28424e9a34 ("syscalls: Fix to add sys_uretprobe to syscall.tbl")
Fixes: 190fec72df4a ("uprobe: Wire up uretprobe system call")
Fixes: 63ded110979b ("uprobe: Change uretprobe syscall scope and number")
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 985a262d0f9e..5bf6148cac2b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 #define __NR_mseal 462
 __SYSCALL(__NR_mseal, sys_mseal)
 
-#define __NR_uretprobe 463
-__SYSCALL(__NR_uretprobe, sys_uretprobe)
-
 #undef __NR_syscalls
-#define __NR_syscalls 464
+#define __NR_syscalls 463
 
 /*
  * 32 bit systems traditionally used different
-- 
cgit v1.2.3


From 37c6dccd683797a5a4ec85d2e94939bf829d3499 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@layalina.io>
Date: Sun, 28 Jul 2024 20:26:59 +0100
Subject: cpufreq: Remove LATENCY_MULTIPLIER

The current LATENCY_MULTIPLIER which has been around for nearly 20 years
causes rate_limit_us to be always in ms range.

On M1 mac mini I get 50 and 56us transition latency, but due to the 1000
multiplier we end up setting rate_limit_us to 50 and 56ms, which gets
capped into 2ms and was 10ms before e13aa799c2a6 ("cpufreq: Change
default transition delay to 2ms")

On Intel I5 system transition latency is 20us but due to the multiplier
we end up with 20ms that again is capped to 2ms.

Given how good modern hardware and how modern workloads require systems
to be more responsive to cater for sudden changes in workload (tasks
sleeping/wakeup/migrating, uclamp causing a sudden boost or cap) and
that 2ms is quarter of the time of 120Hz refresh rate system, drop the
old logic in favour of providing 50% headroom.

	rate_limit_us = 1.5 * latency.

I considered not adding any headroom which could mean that we can end up
with infinite back-to-back requests.

I also considered providing a constant headroom (e.g: 100us) assuming
that any h/w or f/w dealing with the request shouldn't require a large
headroom when transition_latency is actually high.

But for both cases I wasn't sure if h/w or f/w can end up being
overwhelmed dealing with the freq requests in a potentially busy system.
So I opted for providing 50% breathing room.

This is expected to impact schedutil only as the other user,
dbs_governor, takes the max(2*tick, transition_delay_us) and the former
was at least 2ms on 1ms TICK, which is equivalent to the max_delay_us
before applying this patch. For systems with TICK of 4ms, this value
would have almost always ended up with 8ms sampling rate.

For systems that report 0 transition latency, we still default to
returning 1ms as transition delay.

This helps in eliminating a source of latency for applying requests as
mentioned in [1]. For example if we have a 1ms tick, most systems will
miss sending an update at tick when updating the util_avg for a task/CPU
(rate_limit_us will be 2ms for most systems).

Link: https://lore.kernel.org/lkml/20240724212255.mfr2ybiv2j2uqek7@airbuntu/ # [1]
Link: https://lore.kernel.org/lkml/20240205022500.2232124-1-qyousef@layalina.io/
Signed-off-by: Qais Yousef <qyousef@layalina.io>
Link: https://patch.msgid.link/20240728192659.58115-1-qyousef@layalina.io
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d4d2f4d1d7cb..e0e19d9c1323 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
-/*
- * The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor.
- */
-#define LATENCY_MULTIPLIER		(1000)
-
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
 	int	(*init)(struct cpufreq_policy *policy);
-- 
cgit v1.2.3


From 52831d9bbc9aa029aed525dfbe61cb78c1db991c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 9 Jul 2024 22:37:25 +0200
Subject: ACPI: sysfs: evaluate _STR on each sysfs access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The handling of the _STR method is inconsistent with the other method
evaluations. It is the only method which is cached.

The cached value stored in 'struct acpi_device_pnp' has a different
lifetime than the other struct members.

Commit d1efe3c324ea ("ACPI: Add new sysfs interface to export device description")
does not explain this difference.

Evaluating the method every time also removes the necessity to manage
the lifetime of the cached value, which would be a problem when managing
the sysfs attributes through the device core.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240709-acpi-sysfs-groups-v2-2-058ab0667fa8@weissschuh.net
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8db5bd382915..d0ec808e2c42 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -255,7 +255,6 @@ struct acpi_device_pnp {
 	struct list_head ids;		/* _HID and _CIDs */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
-	union acpi_object *str_obj;	/* unicode string for _STR method */
 };
 
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
-- 
cgit v1.2.3


From b6b242d019ed23195c81cf00eb8290d386efb83f Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Fri, 2 Aug 2024 10:59:45 -0400
Subject: Revert "drm: Introduce 'power saving policy' drm property"

This reverts commit 76299a557f36d624ca32500173ad7856e1ad93c0.

It was merged without meeting userspace requirements.

Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240802145946.48073-1-hamza.mahfooz@amd.com
---
 include/drm/drm_connector.h   | 2 --
 include/drm/drm_mode_config.h | 5 -----
 include/uapi/drm/drm_mode.h   | 7 -------
 3 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 5ad735253413..e3fa43291f44 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -2267,8 +2267,6 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector,
 					   u32 supported_colorspaces);
 int drm_mode_create_content_type_property(struct drm_device *dev);
 int drm_mode_create_suggested_offset_properties(struct drm_device *dev);
-int drm_mode_create_power_saving_policy_property(struct drm_device *dev,
-						 uint64_t supported_policies);
 
 int drm_connector_set_path_property(struct drm_connector *connector,
 				    const char *path);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 150f9a3b649f..ab0f167474b1 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -969,11 +969,6 @@ struct drm_mode_config {
 	 */
 	struct drm_atomic_state *suspend_state;
 
-	/**
-	 * @power_saving_policy: bitmask for power saving policy requests.
-	 */
-	struct drm_property *power_saving_policy;
-
 	const struct drm_mode_config_helper_funcs *helper_private;
 };
 
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 880303c2ad97..d390011b89b4 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -152,13 +152,6 @@ extern "C" {
 #define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
 #define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
-/* power saving policy options */
-#define DRM_MODE_REQUIRE_COLOR_ACCURACY	BIT(0)	/* Compositor requires color accuracy */
-#define DRM_MODE_REQUIRE_LOW_LATENCY	BIT(1)	/* Compositor requires low latency */
-
-#define DRM_MODE_POWER_SAVING_POLICY_ALL	(DRM_MODE_REQUIRE_COLOR_ACCURACY |\
-						 DRM_MODE_REQUIRE_LOW_LATENCY)
-
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
 #define DRM_MODE_DITHERING_ON	1
-- 
cgit v1.2.3


From 70e6b7d9ae3c63df90a7bba7700e8d5c300c3c60 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 2 Aug 2024 14:55:54 +0100
Subject: x86/i8253: Disable PIT timer 0 when not in use

Leaving the PIT interrupt running can cause noticeable steal time for
virtual guests. The VMM generally has a timer which toggles the IRQ input
to the PIC and I/O APIC, which takes CPU time away from the guest. Even
on real hardware, running the counter may use power needlessly (albeit
not much).

Make sure it's turned off if it isn't going to be used.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mhkelley@outlook.com>
Link: https://lore.kernel.org/all/20240802135555.564941-1-dwmw2@infradead.org
---
 include/linux/i8253.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index 8336b2f6f834..bf169cfef7f1 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -24,6 +24,7 @@ extern raw_spinlock_t i8253_lock;
 extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
+extern void clockevent_i8253_disable(void);
 
 extern void setup_pit_timer(void);
 
-- 
cgit v1.2.3


From 531b2ca0a940ac9db03f246c8b77c4201de72b00 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 2 Aug 2024 14:55:55 +0100
Subject: clockevents/drivers/i8253: Fix stop sequence for timer 0

According to the data sheet, writing the MODE register should stop the
counter (and thus the interrupts). This appears to work on real hardware,
at least modern Intel and AMD systems. It should also work on Hyper-V.

However, on some buggy virtual machines the mode change doesn't have any
effect until the counter is subsequently loaded (or perhaps when the IRQ
next fires).

So, set MODE 0 and then load the counter, to ensure that those buggy VMs
do the right thing and the interrupts stop. And then write MODE 0 *again*
to stop the counter on compliant implementations too.

Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero
when it should only happen once, but the second MODE write stops that too.

Userspace test program (mostly written by tglx):
=====
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <sys/io.h>

static __always_inline void __out##bwl(type value, uint16_t port)	\
{									\
	asm volatile("out" #bwl " %" #bw "0, %w1"			\
		     : : "a"(value), "Nd"(port));			\
}									\
									\
static __always_inline type __in##bwl(uint16_t port)			\
{									\
	type value;							\
	asm volatile("in" #bwl " %w1, %" #bw "0"			\
		     : "=a"(value) : "Nd"(port));			\
	return value;							\
}

BUILDIO(b, b, uint8_t)

 #define inb __inb
 #define outb __outb

 #define PIT_MODE	0x43
 #define PIT_CH0	0x40
 #define PIT_CH2	0x42

static int is8254;

static void dump_pit(void)
{
	if (is8254) {
		// Latch and output counter and status
		outb(0xC2, PIT_MODE);
		printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0));
	} else {
		// Latch and output counter
		outb(0x0, PIT_MODE);
		printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0));
	}
}

int main(int argc, char* argv[])
{
	int nr_counts = 2;

	if (argc > 1)
		nr_counts = atoi(argv[1]);

	if (argc > 2)
		is8254 = 1;

	if (ioperm(0x40, 4, 1) != 0)
		return 1;

	dump_pit();

	printf("Set oneshot\n");
	outb(0x38, PIT_MODE);
	outb(0x00, PIT_CH0);
	outb(0x0F, PIT_CH0);

	dump_pit();
	usleep(1000);
	dump_pit();

	printf("Set periodic\n");
	outb(0x34, PIT_MODE);
	outb(0x00, PIT_CH0);
	outb(0x0F, PIT_CH0);

	dump_pit();
	usleep(1000);
	dump_pit();
	dump_pit();
	usleep(100000);
	dump_pit();
	usleep(100000);
	dump_pit();

	printf("Set stop (%d counter writes)\n", nr_counts);
	outb(0x30, PIT_MODE);
	while (nr_counts--)
		outb(0xFF, PIT_CH0);

	dump_pit();
	usleep(100000);
	dump_pit();
	usleep(100000);
	dump_pit();

	printf("Set MODE 0\n");
	outb(0x30, PIT_MODE);

	dump_pit();
	usleep(100000);
	dump_pit();
	usleep(100000);
	dump_pit();

	return 0;
}
=====

Suggested-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mhkelley@outlook.com>
Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org
---
 include/linux/i8253.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index bf169cfef7f1..56c280eb2d4f 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,7 +21,6 @@
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
 extern raw_spinlock_t i8253_lock;
-extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
 extern void clockevent_i8253_disable(void);
-- 
cgit v1.2.3


From 90ec3a8a7fd0d43026fcca979713e077d4883b56 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 2 Aug 2024 16:22:13 +0100
Subject: spi: Add empty versions of ACPI functions

Provide empty versions of acpi_spi_count_resources(),
acpi_spi_device_alloc() and acpi_spi_find_controller_by_adev()
if the real functions are not being built.

This commit fixes two problems with the original definitions:

1) There wasn't an empty version of these functions
2) The #if only depended on CONFIG_ACPI. But the functions are implemented
   in the core spi.c so CONFIG_SPI_MASTER must also be enabled for the real
   functions to exist.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20240802152215.20831-2-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e4f3f3d30a03..d47d5f14ff99 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev,
 					struct spi_controller *ctlr);
 extern void spi_unregister_controller(struct spi_controller *ctlr);
 
-#if IS_ENABLED(CONFIG_ACPI)
+#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER)
 extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
 extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 						struct acpi_device *adev,
 						int index);
 int acpi_spi_count_resources(struct acpi_device *adev);
+#else
+static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev)
+{
+	return NULL;
+}
+
+static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+						       struct acpi_device *adev,
+						       int index)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline int acpi_spi_count_resources(struct acpi_device *adev)
+{
+	return 0;
+}
 #endif
 
 /*
-- 
cgit v1.2.3


From e99129e5dbf7ca87233d31ad19348f6ce8627b38 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 1 Aug 2024 13:32:59 -1000
Subject: sched_ext: Allow p->scx.disallow only while loading

From 1232da7eced620537a78f19c8cf3d4a3508e2419 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 31 Jul 2024 09:14:52 -1000

p->scx.disallow provides a way for the BPF scheduler to reject certain tasks
from attaching. It's currently allowed for both the load and fork paths;
however, the latter doesn't actually work as p->sched_class is already set
by the time scx_ops_init_task() is called during fork.

This is a convenience feature which is mostly useful from the load path
anyway. Allow it only from the load path.

v2: Trigger scx_ops_error() iff @p->policy == SCHED_EXT to make it a bit
    easier for the BPF scheduler (David).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: "Zhangqiao (2012 lab)" <zhangqiao22@huawei.com>
Link: http://lkml.kernel.org/r/20240711110720.1285-1-zhangqiao22@huawei.com
Fixes: 7bb6f0810ecf ("sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT")
Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 26e1c33bc844..69f68e2121a8 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -179,11 +179,12 @@ struct sched_ext_entity {
 	 * If set, reject future sched_setscheduler(2) calls updating the policy
 	 * to %SCHED_EXT with -%EACCES.
 	 *
-	 * If set from ops.init_task() and the task's policy is already
-	 * %SCHED_EXT, which can happen while the BPF scheduler is being loaded
-	 * or by inhering the parent's policy during fork, the task's policy is
-	 * rejected and forcefully reverted to %SCHED_NORMAL. The number of
-	 * such events are reported through /sys/kernel/debug/sched_ext::nr_rejected.
+	 * Can be set from ops.init_task() while the BPF scheduler is being
+	 * loaded (!scx_init_task_args->fork). If set and the task's policy is
+	 * already %SCHED_EXT, the task's policy is rejected and forcefully
+	 * reverted to %SCHED_NORMAL. The number of such events are reported
+	 * through /sys/kernel/debug/sched_ext::nr_rejected. Setting this flag
+	 * during fork is not allowed.
 	 */
 	bool			disallow;	/* reject switching into SCX */
 
-- 
cgit v1.2.3


From 49675f5bdf9ae2624b430dafda4cb29024521625 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 1 Aug 2024 09:34:01 -0700
Subject: net: remove IFF_* re-definition

We re-define values of enum netdev_priv_flags as preprocessor
macros with the same name. I guess this was done to avoid breaking
out of tree modules which may use #ifdef X for kernel compatibility?
Commit 7aa98047df95 ("net: move net_device priv_flags out from UAPI")
which added the enum doesn't say. In any case, the flags with defines
are quite old now, and defines for new flags don't get added.
OOT drivers have to resort to code greps for compat detection, anyway.
Let's delete these defines, save LoC, help LXR link to the right place.

Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20240801163401.378723-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 32 --------------------------------
 1 file changed, 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 607009150b5f..0ef3eaa23f4b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1689,38 +1689,6 @@ enum netdev_priv_flags {
 	IFF_SEE_ALL_HWTSTAMP_REQUESTS	= BIT_ULL(33),
 };
 
-#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
-#define IFF_EBRIDGE			IFF_EBRIDGE
-#define IFF_BONDING			IFF_BONDING
-#define IFF_ISATAP			IFF_ISATAP
-#define IFF_WAN_HDLC			IFF_WAN_HDLC
-#define IFF_XMIT_DST_RELEASE		IFF_XMIT_DST_RELEASE
-#define IFF_DONT_BRIDGE			IFF_DONT_BRIDGE
-#define IFF_DISABLE_NETPOLL		IFF_DISABLE_NETPOLL
-#define IFF_MACVLAN_PORT		IFF_MACVLAN_PORT
-#define IFF_BRIDGE_PORT			IFF_BRIDGE_PORT
-#define IFF_OVS_DATAPATH		IFF_OVS_DATAPATH
-#define IFF_TX_SKB_SHARING		IFF_TX_SKB_SHARING
-#define IFF_UNICAST_FLT			IFF_UNICAST_FLT
-#define IFF_TEAM_PORT			IFF_TEAM_PORT
-#define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
-#define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
-#define IFF_MACVLAN			IFF_MACVLAN
-#define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
-#define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
-#define IFF_NO_QUEUE			IFF_NO_QUEUE
-#define IFF_OPENVSWITCH			IFF_OPENVSWITCH
-#define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
-#define IFF_TEAM			IFF_TEAM
-#define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
-#define IFF_PHONY_HEADROOM		IFF_PHONY_HEADROOM
-#define IFF_MACSEC			IFF_MACSEC
-#define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
-#define IFF_FAILOVER			IFF_FAILOVER
-#define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
-#define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
-#define IFF_TX_SKB_NO_LINEAR		IFF_TX_SKB_NO_LINEAR
-
 /* Specifies the type of the struct net_device::ml_priv pointer */
 enum netdev_ml_priv_type {
 	ML_PRIV_NONE,
-- 
cgit v1.2.3


From 7e1d512dab5042aa1c2224ed362be79e3f22a15e Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 1 Aug 2024 21:00:03 +0100
Subject: linkmode: Change return type of linkmode_andnot to bool

linkmode_andnot() simply returns the result of bitmap_andnot().
And the return type of bitmap_andnot() is bool.
So it makes sense for the return type of linkmode_andnot()
to also be bool.

I checked all call-sites and they either ignore the return
value or treat it as a bool.

Compile tested only.

Link: https://lore.kernel.org/netdev/68088998-4486-4930-90a4-96a32f08c490@lunn.ch/
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240801-linkfield-bowl-v1-1-d58f68967802@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/linkmode.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index d94bfd9ac8cc..3b9de09871f6 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -37,8 +37,9 @@ static inline bool linkmode_empty(const unsigned long *src)
 	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1,
-				  const unsigned long *src2)
+static inline bool linkmode_andnot(unsigned long *dst,
+				   const unsigned long *src1,
+				   const unsigned long *src2)
 {
 	return bitmap_andnot(dst, src1, src2,  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
-- 
cgit v1.2.3


From f94074687d05aea10b50c4f4055a19d9d0c3bd27 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Thu, 1 Aug 2024 17:23:11 +0300
Subject: net: core: annotate socks of struct sock_reuseport with __counted_by

According to '__reuseport_alloc()', annotate flexible array member
'sock' of 'struct sock_reuseport' with '__counted_by()' and use
convenient 'struct_size()' to simplify the math used in 'kzalloc()'.

Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20240801142311.42837-1-dmantipov@yandex.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock_reuseport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 6ec140b0a61b..6e4faf3ee76f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -26,7 +26,7 @@ struct sock_reuseport {
 	unsigned int		bind_inany:1;
 	unsigned int		has_conns:1;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
-	struct sock		*socks[];	/* array of sock pointers */
+	struct sock		*socks[] __counted_by(max_socks);
 };
 
 extern int reuseport_alloc(struct sock *sk, bool bind_inany);
-- 
cgit v1.2.3


From 8585632a8600d26d4e6b7246d375a9c3a344f0d2 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Tue, 9 Jul 2024 13:14:28 +0200
Subject: iio: backend: remove unused parameter

Indio_dev was not being used in iio_backend_extend_chan_spec() so remove
it.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240709-dev-iio-backend-add-debugfs-v1-1-fb4b8f2373c7@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 8099759d7242..4e81931703ab 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -142,8 +142,7 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private,
 ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private,
 				 const struct iio_chan_spec *chan, char *buf);
 
-int iio_backend_extend_chan_spec(struct iio_dev *indio_dev,
-				 struct iio_backend *back,
+int iio_backend_extend_chan_spec(struct iio_backend *back,
 				 struct iio_chan_spec *chan);
 void *iio_backend_get_priv(const struct iio_backend *conv);
 struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name);
-- 
cgit v1.2.3


From b40cafc1143600122719ae335359722538ed06e3 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Thu, 11 Jul 2024 14:15:41 -0500
Subject: dt-bindings: iio: adc: add AD4695 and similar ADCs

Add device tree bindings for AD4695 and similar ADCs.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240711-iio-adc-ad4695-v4-0-c31621113b57@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/dt-bindings/iio/adi,ad4695.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 include/dt-bindings/iio/adi,ad4695.h

(limited to 'include')

diff --git a/include/dt-bindings/iio/adi,ad4695.h b/include/dt-bindings/iio/adi,ad4695.h
new file mode 100644
index 000000000000..9fbef542bf67
--- /dev/null
+++ b/include/dt-bindings/iio/adi,ad4695.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_ADI_AD4695_H
+#define _DT_BINDINGS_ADI_AD4695_H
+
+#define AD4695_COMMON_MODE_REFGND	0xFF
+#define AD4695_COMMON_MODE_COM		0xFE
+
+#endif /* _DT_BINDINGS_ADI_AD4695_H */
-- 
cgit v1.2.3


From 2477d7b179d3295c9978420027750f047976bd04 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Fri, 26 Jul 2024 15:18:40 -0500
Subject: iio: backend: spelling: continuous -> continuous

This fixes the spelling in IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240726-iio-backend-spelling-continuous-v1-1-467c6e3f78ff@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 4e81931703ab..29c4cf0bd761 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -17,7 +17,7 @@ enum iio_backend_data_type {
 };
 
 enum iio_backend_data_source {
-	IIO_BACKEND_INTERNAL_CONTINUOS_WAVE,
+	IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE,
 	IIO_BACKEND_EXTERNAL,
 	IIO_BACKEND_DATA_SOURCE_MAX
 };
-- 
cgit v1.2.3


From f44e94afbb340be90100f8a172ebb14a9e717c8b Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Fri, 26 Jul 2024 10:23:15 +0200
Subject: iio: core: annotate masklength as __private

Now that all users are using the proper accessors, we can mark
masklength as __private so that no one tries to write. We also get help
from checkers in warning us in case someone does it.

To access the private field from IIO core code, we need to use the
ACCESS_PRIVATE() macro.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240726-dev-iio-masklength-private3-v1-23-82913fc0fb87@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index dd6bbc468283..f6c0499853bb 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -609,7 +609,7 @@ struct iio_dev {
 	int				scan_bytes;
 
 	const unsigned long		*available_scan_masks;
-	unsigned			masklength;
+	unsigned			__private masklength;
 	const unsigned long		*active_scan_mask;
 	bool				scan_timestamp;
 	struct iio_trigger		*trig;
@@ -861,7 +861,7 @@ static inline const struct iio_scan_type
  */
 static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev)
 {
-	return indio_dev->masklength;
+	return ACCESS_PRIVATE(indio_dev, masklength);
 }
 
 /**
-- 
cgit v1.2.3


From d092b6869817208326005df0a656e3bb9724d89f Mon Sep 17 00:00:00 2001
From: Julien Stephan <jstephan@baylibre.com>
Date: Wed, 31 Jul 2024 09:05:43 +0200
Subject: iio: core: add function to retrieve active_scan_mask index

Add a function to retrieve the index of the active scan mask inside the
available scan masks array.

As in iio_scan_mask_match and iio_sanity_check_avail_scan_masks,
this function does not handle multi-long masks correctly.
It only checks the first long to be zero, and will use such mask
as a terminator even if there was bits set after the first long.

This should be fine since the available_scan_mask has already been
sanity tested using iio_sanity_check_avail_scan_masks.

See iio_scan_mask_match and iio_sanity_check_avail_scan_masks for
more details

Signed-off-by: Julien Stephan <jstephan@baylibre.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240731-ad7380-add-single-ended-chips-v2-2-cd63bf05744c@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index f6c0499853bb..3a735a9a9eae 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -864,6 +864,8 @@ static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev)
 	return ACCESS_PRIVATE(indio_dev, masklength);
 }
 
+int iio_active_scan_mask_index(struct iio_dev *indio_dev);
+
 /**
  * iio_for_each_active_channel - Iterated over active channels
  * @indio_dev: the IIO device
-- 
cgit v1.2.3


From 2256f37e24b1979f9a97de0b76cabbf8544a8aff Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Fri, 2 Aug 2024 16:26:59 +0200
Subject: iio: backend: introduce struct iio_backend_info

Instead of only passing the backend ops when calling
devm_iio_backend_register(), pass an info like structure that will
contains the ops and additional information. Fow now, the backend name
is being added as that will be used by the debugFS interface introduced
in a later patch.

It also opens the door for further customizations passed by backends.

All users of devm_iio_backend_register() were updated accordingly.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-1-4cb62852f0d0@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 29c4cf0bd761..f120fa2e0a43 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -115,6 +115,16 @@ struct iio_backend_ops {
 			    const struct iio_chan_spec *chan, char *buf);
 };
 
+/**
+ * struct iio_backend_info - info structure for an iio_backend
+ * @name: Backend name.
+ * @ops: Backend operations.
+ */
+struct iio_backend_info {
+	const char *name;
+	const struct iio_backend_ops *ops;
+};
+
 int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan);
 int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan);
 int devm_iio_backend_enable(struct device *dev, struct iio_backend *back);
@@ -151,6 +161,6 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
 					  struct fwnode_handle *fwnode);
 
 int devm_iio_backend_register(struct device *dev,
-			      const struct iio_backend_ops *ops, void *priv);
+			      const struct iio_backend_info *info, void *priv);
 
 #endif
-- 
cgit v1.2.3


From cdf01e0809a4c6c7877ea52401c2a6679df7aed6 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Fri, 2 Aug 2024 16:27:00 +0200
Subject: iio: backend: add debugFs interface

This adds a basic debugfs interface for backends. Two new ops are being
added:

 * debugfs_reg_access: Analogous to the core IIO one but for backend
   devices.
 * debugfs_print_chan_status: One useful usecase for this one is for
   testing test tones in a digital interface and "ask" the backend to
   dump more details on why a test tone might have errors.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-2-4cb62852f0d0@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index f120fa2e0a43..9d0dba7ab9e7 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -22,6 +22,8 @@ enum iio_backend_data_source {
 	IIO_BACKEND_DATA_SOURCE_MAX
 };
 
+#define iio_backend_debugfs_ptr(ptr)	PTR_IF(IS_ENABLED(CONFIG_DEBUG_FS), ptr)
+
 /**
  * IIO_BACKEND_EX_INFO - Helper for an IIO extended channel attribute
  * @_name: Attribute name
@@ -81,6 +83,8 @@ enum iio_backend_sample_trigger {
  * @extend_chan_spec: Extend an IIO channel.
  * @ext_info_set: Extended info setter.
  * @ext_info_get: Extended info getter.
+ * @debugfs_print_chan_status: Print channel status into a buffer.
+ * @debugfs_reg_access: Read or write register value of backend.
  **/
 struct iio_backend_ops {
 	int (*enable)(struct iio_backend *back);
@@ -113,6 +117,11 @@ struct iio_backend_ops {
 			    const char *buf, size_t len);
 	int (*ext_info_get)(struct iio_backend *back, uintptr_t private,
 			    const struct iio_chan_spec *chan, char *buf);
+	int (*debugfs_print_chan_status)(struct iio_backend *back,
+					 unsigned int chan, char *buf,
+					 size_t len);
+	int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg,
+				  unsigned int writeval, unsigned int *readval);
 };
 
 /**
@@ -163,4 +172,9 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
 int devm_iio_backend_register(struct device *dev,
 			      const struct iio_backend_info *info, void *priv);
 
+ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back,
+					      unsigned int chan, char *buf,
+					      size_t len);
+void iio_backend_debugfs_add(struct iio_backend *back,
+			     struct iio_dev *indio_dev);
 #endif
-- 
cgit v1.2.3


From b001635a7c3014fe7745a507840c5d8f58235c04 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Fri, 2 Aug 2024 16:27:01 +0200
Subject: iio: backend: add a modified prbs23 support

Support ADI specific prb23 sequence that can be used both for
calibrating or debugging digital interfaces.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-3-4cb62852f0d0@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 9d0dba7ab9e7..2b9d1aa86552 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -56,6 +56,8 @@ enum iio_backend_test_pattern {
 	IIO_BACKEND_NO_TEST_PATTERN,
 	/* modified prbs9 */
 	IIO_BACKEND_ADI_PRBS_9A = 32,
+	/* modified prbs23 */
+	IIO_BACKEND_ADI_PRBS_23A,
 	IIO_BACKEND_TEST_PATTERN_MAX
 };
 
-- 
cgit v1.2.3


From b6547e54864b998af21fdcaa0a88cf8e7efe641a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Aug 2024 18:12:06 -0700
Subject: runtime constants: deal with old decrepit linkers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The runtime constants linker script depended on documented linker
behavior [1]:

 "If an output section’s name is the same as the input section’s name
  and is representable as a C identifier, then the linker will
  automatically PROVIDE two symbols: __start_SECNAME and __stop_SECNAME,
  where SECNAME is the name of the section. These indicate the start
  address and end address of the output section respectively"

to just automatically define the symbol names for the bounds of the
runtime constant arrays.

It turns out that this isn't actually something we can rely on, with old
linkers not generating these automatic symbols.  It looks to have been
introduced in binutils-2.29 back in 2017, and we still support building
with versions all the way back to binutils-2.25 (from 2015).

And yes, Oleg actually seems to be using such ancient versions of
binutils.

So instead of depending on the implicit symbols from "section names
match and are representable C identifiers", just do this all manually.
It's not like it causes us any extra pain, we already have to do that
for all the other sections that we use that often have special
characters in them.

Reported-and-tested-by: Oleg Nesterov <oleg@redhat.com>
Link: https://sourceware.org/binutils/docs/ld/Input-Section-Example.html [1]
Link: https://lore.kernel.org/all/20240802114518.GA20924@redhat.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ad6afc5c4918..1ae44793132a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -911,13 +911,12 @@
 #define CON_INITCALL							\
 	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
 
-#define RUNTIME_NAME(t,x) runtime_##t##_##x
+#define NAMED_SECTION(name) \
+	. = ALIGN(8); \
+	name : AT(ADDR(name) - LOAD_OFFSET) \
+	{ BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) }
 
-#define RUNTIME_CONST(t,x)						\
-	. = ALIGN(8);							\
-	RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) {	\
-		*(RUNTIME_NAME(t,x));					\
-	}
+#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x)
 
 /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
 #define KUNIT_TABLE()							\
-- 
cgit v1.2.3


From 768e4bb6a75e3c6a034df7c67edac20bd222857e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 31 Jul 2024 13:07:17 -0700
Subject: net: Don't register pernet_operations if only one of id or size is
 specified.

We can allocate per-netns memory for struct pernet_operations by specifying
id and size.

register_pernet_operations() assigns an id to pernet_operations and later
ops_init() allocates the specified size of memory as net->gen->ptr[id].

If id is missing, no memory is allocated.  If size is not specified,
pernet_operations just wastes an entry of net->gen->ptr[] for every netns.

net_generic is available only when both id and size are specified, so let's
ensure that.

While we are at it, we add const to both fields.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 20c34bd7a077..e67b483cc8bb 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -451,8 +451,8 @@ struct pernet_operations {
 	/* Following method is called with RTNL held. */
 	void (*exit_batch_rtnl)(struct list_head *net_exit_list,
 				struct list_head *dev_kill_list);
-	unsigned int *id;
-	size_t size;
+	unsigned int * const id;
+	const size_t size;
 };
 
 /*
-- 
cgit v1.2.3


From d21fe1e9a6a44e752e227d3a43f41aed790dad95 Mon Sep 17 00:00:00 2001
From: Inochi Amaoto <inochiama@outlook.com>
Date: Thu, 18 Jul 2024 10:23:54 +0800
Subject: pinctrl: pinconf-generic: Add support for "input-schmitt-microvolt"
 property

Add "input-schmitt-microvolt" property to generic options used for DT
parsing files. This enables drivers, which use generic pin configurations,
to get the value passed to this property.

Signed-off-by: Inochi Amaoto <inochiama@outlook.com>
Link: https://lore.kernel.org/IA1PR20MB4953806785BA04E075DC4F03BBAC2@IA1PR20MB4953.namprd20.prod.outlook.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index a65d3d078e58..53cfde98433d 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -81,6 +81,8 @@ struct pinctrl_map;
  * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
  *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
  *      schmitt-trigger mode is disabled.
+ * @PIN_CONFIG_INPUT_SCHMITT_UV: this will configure an input pin to run in
+ *	schmitt-trigger mode. The argument is in uV.
  * @PIN_CONFIG_MODE_LOW_POWER: this will configure the pin for low power
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
@@ -132,6 +134,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
+	PIN_CONFIG_INPUT_SCHMITT_UV,
 	PIN_CONFIG_MODE_LOW_POWER,
 	PIN_CONFIG_MODE_PWM,
 	PIN_CONFIG_OUTPUT,
-- 
cgit v1.2.3


From f17c06c6608ad4ecd2ccf321753fb511812d821b Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 2 Aug 2024 16:22:14 +0100
Subject: i2c: Fix conditional for substituting empty ACPI functions

Add IS_ENABLED(CONFIG_I2C) to the conditional around a bunch of ACPI
functions.

The conditional around these functions depended only on CONFIG_ACPI.
But the functions are implemented in I2C core, so are only present if
CONFIG_I2C is enabled.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 07e33bbc9256..7eedd0c662da 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev,
 struct acpi_resource;
 struct acpi_resource_i2c_serialbus;
 
-#if IS_ENABLED(CONFIG_ACPI)
+#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C)
 bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 			       struct acpi_resource_i2c_serialbus **i2c);
 int i2c_acpi_client_count(struct acpi_device *adev);
-- 
cgit v1.2.3


From b88f55389ad27f05ed84af9e1026aa64dbfabc9a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 4 Aug 2024 18:48:10 +0900
Subject: profiling: remove profile=sleep support

The kernel sleep profile is no longer working due to a recursive locking
bug introduced by commit 42a20f86dc19 ("sched: Add wrapper for get_wchan()
to keep task blocked")

Booting with the 'profile=sleep' kernel command line option added or
executing

  # echo -n sleep > /sys/kernel/profiling

after boot causes the system to lock up.

Lockdep reports

  kthreadd/3 is trying to acquire lock:
  ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: get_wchan+0x32/0x70

  but task is already holding lock:
  ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: try_to_wake_up+0x53/0x370

with the call trace being

   lock_acquire+0xc8/0x2f0
   get_wchan+0x32/0x70
   __update_stats_enqueue_sleeper+0x151/0x430
   enqueue_entity+0x4b0/0x520
   enqueue_task_fair+0x92/0x6b0
   ttwu_do_activate+0x73/0x140
   try_to_wake_up+0x213/0x370
   swake_up_locked+0x20/0x50
   complete+0x2f/0x40
   kthread+0xfb/0x180

However, since nobody noticed this regression for more than two years,
let's remove 'profile=sleep' support based on the assumption that nobody
needs this functionality.

Fixes: 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked")
Cc: stable@vger.kernel.org # v5.16+
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 2fb487f61d12..3f53cdb0c27c 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -10,7 +10,6 @@
 
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
-#define SLEEP_PROFILING	3
 #define KVM_PROFILING	4
 
 struct proc_dir_entry;
-- 
cgit v1.2.3


From 0e8b53979ac86eddb3fd76264025a70071a25574 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Mon, 5 Aug 2024 14:01:21 +0900
Subject: bpf: kprobe: remove unused declaring of bpf_kprobe_override

After the commit 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction
pointer with original one"), "bpf_kprobe_override" is not used anywhere
anymore, and we can remove it now.

Link: https://lore.kernel.org/all/20240710085939.11520-1-dongml2@chinatelecom.cn/

Fixes: 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one")
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/trace_events.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 9df3e2973626..9435185c10ef 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -880,7 +880,6 @@ do {									\
 struct perf_event;
 
 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
-DECLARE_PER_CPU(int, bpf_kprobe_override);
 
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
-- 
cgit v1.2.3


From 613f21505b25a4f43f33de00f11afc059bedde2b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 4 Jul 2024 11:01:51 +0200
Subject: media: cec: core: add new CEC_MSG_FL_REPLY_VENDOR_ID flag

If this flag is set, then the reply is expected to consist of
the CEC_MSG_VENDOR_COMMAND_WITH_ID opcode followed by the Vendor ID (as
used in bytes 1-4 of the message), followed by the struct cec_msg reply
field.

Note that this assumes that the byte after the Vendor ID is a
vendor-specific opcode.

This flag makes it easier to wait for replies to vendor commands,
using the same CEC framework support for waiting for regular replies.

Support for this flag is indicated by setting the new
CEC_CAP_REPLY_VENDOR_ID capability.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/cec.h      | 2 ++
 include/uapi/linux/cec.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/media/cec.h b/include/media/cec.h
index d131514032f2..07d2ee8a3904 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -66,6 +66,8 @@ struct cec_data {
 	struct list_head xfer_list;
 	struct cec_adapter *adap;
 	struct cec_msg msg;
+	u8 match_len;
+	u8 match_reply[5];
 	struct cec_fh *fh;
 	struct delayed_work work;
 	struct completion c;
diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h
index b8e071abaea5..894fffc66f2c 100644
--- a/include/uapi/linux/cec.h
+++ b/include/uapi/linux/cec.h
@@ -165,6 +165,7 @@ static inline int cec_msg_recv_is_rx_result(const struct cec_msg *msg)
 /* cec_msg flags field */
 #define CEC_MSG_FL_REPLY_TO_FOLLOWERS	(1 << 0)
 #define CEC_MSG_FL_RAW			(1 << 1)
+#define CEC_MSG_FL_REPLY_VENDOR_ID	(1 << 2)
 
 /* cec_msg tx/rx_status field */
 #define CEC_TX_STATUS_OK		(1 << 0)
@@ -339,6 +340,8 @@ static inline int cec_is_unconfigured(__u16 log_addr_mask)
 #define CEC_CAP_MONITOR_PIN	(1 << 7)
 /* CEC_ADAP_G_CONNECTOR_INFO is available */
 #define CEC_CAP_CONNECTOR_INFO	(1 << 8)
+/* CEC_MSG_FL_REPLY_VENDOR_ID is available */
+#define CEC_CAP_REPLY_VENDOR_ID	(1 << 9)
 
 /**
  * struct cec_caps - CEC capabilities structure.
-- 
cgit v1.2.3


From d7bdb8e6aabe218fd980768a1486434e42761539 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 27 May 2024 16:25:51 +0200
Subject: pmdomain: core: Enable s2idle for CPU PM domains on PREEMPT_RT

To allow a genpd provider for a CPU PM domain to enter a domain-idle-state
during s2idle on a PREEMPT_RT based configuration, we can't use the regular
spinlock, as they are turned into sleepable locks on PREEMPT_RT.

To address this problem, let's convert into using the raw spinlock, but
only for genpd providers that have the GENPD_FLAG_CPU_DOMAIN bit set. In
this way, the lock can still be acquired/released in atomic context, which
is needed in the idle-path for PREEMPT_RT.

Do note that the genpd power-on/off notifiers may also be fired during
s2idle, but these are already prepared for PREEMPT_RT as they are based on
the raw notifiers. However, consumers of them may need to adopt accordingly
to work properly on PREEMPT_RT.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Raghavendra Kakarla <quic_rkakarla@quicinc.com>  # qcm6490 with PREEMPT_RT set
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20240527142557.321610-2-ulf.hansson@linaro.org
---
 include/linux/pm_domain.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 858c8e7851fb..b86bb52858ac 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -198,8 +198,11 @@ struct generic_pm_domain {
 			spinlock_t slock;
 			unsigned long lock_flags;
 		};
+		struct {
+			raw_spinlock_t raw_slock;
+			unsigned long raw_lock_flags;
+		};
 	};
-
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From d5934e76316e84eced836b6b2bafae1837d1cd58 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 29 Mar 2024 16:48:48 -0700
Subject: cleanup: Add usage and style documentation

When proposing that PCI grow some new cleanup helpers for pci_dev_put()
and pci_dev_{lock,unlock} [1], Bjorn had some fundamental questions
about expectations and best practices. Upon reviewing an updated
changelog with those details he recommended adding them to documentation
in the header file itself.

Add that documentation and link it into the rendering for
Documentation/core-api/.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/171175585714.2192972.12661675876300167762.stgit@dwillia2-xfh.jf.intel.com
---
 include/linux/cleanup.h | 136 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index d9e613803df1..9c6b4f2c0176 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -4,6 +4,142 @@
 
 #include <linux/compiler.h>
 
+/**
+ * DOC: scope-based cleanup helpers
+ *
+ * The "goto error" pattern is notorious for introducing subtle resource
+ * leaks. It is tedious and error prone to add new resource acquisition
+ * constraints into code paths that already have several unwind
+ * conditions. The "cleanup" helpers enable the compiler to help with
+ * this tedium and can aid in maintaining LIFO (last in first out)
+ * unwind ordering to avoid unintentional leaks.
+ *
+ * As drivers make up the majority of the kernel code base, here is an
+ * example of using these helpers to clean up PCI drivers. The target of
+ * the cleanups are occasions where a goto is used to unwind a device
+ * reference (pci_dev_put()), or unlock the device (pci_dev_unlock())
+ * before returning.
+ *
+ * The DEFINE_FREE() macro can arrange for PCI device references to be
+ * dropped when the associated variable goes out of scope::
+ *
+ *	DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
+ *	...
+ *	struct pci_dev *dev __free(pci_dev_put) =
+ *		pci_get_slot(parent, PCI_DEVFN(0, 0));
+ *
+ * The above will automatically call pci_dev_put() if @dev is non-NULL
+ * when @dev goes out of scope (automatic variable scope). If a function
+ * wants to invoke pci_dev_put() on error, but return @dev (i.e. without
+ * freeing it) on success, it can do::
+ *
+ *	return no_free_ptr(dev);
+ *
+ * ...or::
+ *
+ *	return_ptr(dev);
+ *
+ * The DEFINE_GUARD() macro can arrange for the PCI device lock to be
+ * dropped when the scope where guard() is invoked ends::
+ *
+ *	DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
+ *	...
+ *	guard(pci_dev)(dev);
+ *
+ * The lifetime of the lock obtained by the guard() helper follows the
+ * scope of automatic variable declaration. Take the following example::
+ *
+ *	func(...)
+ *	{
+ *		if (...) {
+ *			...
+ *			guard(pci_dev)(dev); // pci_dev_lock() invoked here
+ *			...
+ *		} // <- implied pci_dev_unlock() triggered here
+ *	}
+ *
+ * Observe the lock is held for the remainder of the "if ()" block not
+ * the remainder of "func()".
+ *
+ * Now, when a function uses both __free() and guard(), or multiple
+ * instances of __free(), the LIFO order of variable definition order
+ * matters. GCC documentation says:
+ *
+ * "When multiple variables in the same scope have cleanup attributes,
+ * at exit from the scope their associated cleanup functions are run in
+ * reverse order of definition (last defined, first cleanup)."
+ *
+ * When the unwind order matters it requires that variables be defined
+ * mid-function scope rather than at the top of the file.  Take the
+ * following example and notice the bug highlighted by "!!"::
+ *
+ *	LIST_HEAD(list);
+ *	DEFINE_MUTEX(lock);
+ *
+ *	struct object {
+ *	        struct list_head node;
+ *	};
+ *
+ *	static struct object *alloc_add(void)
+ *	{
+ *	        struct object *obj;
+ *
+ *	        lockdep_assert_held(&lock);
+ *	        obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ *	        if (obj) {
+ *	                LIST_HEAD_INIT(&obj->node);
+ *	                list_add(obj->node, &list):
+ *	        }
+ *	        return obj;
+ *	}
+ *
+ *	static void remove_free(struct object *obj)
+ *	{
+ *	        lockdep_assert_held(&lock);
+ *	        list_del(&obj->node);
+ *	        kfree(obj);
+ *	}
+ *
+ *	DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T))
+ *	static int init(void)
+ *	{
+ *	        struct object *obj __free(remove_free) = NULL;
+ *	        int err;
+ *
+ *	        guard(mutex)(&lock);
+ *	        obj = alloc_add();
+ *
+ *	        if (!obj)
+ *	                return -ENOMEM;
+ *
+ *	        err = other_init(obj);
+ *	        if (err)
+ *	                return err; // remove_free() called without the lock!!
+ *
+ *	        no_free_ptr(obj);
+ *	        return 0;
+ *	}
+ *
+ * That bug is fixed by changing init() to call guard() and define +
+ * initialize @obj in this order::
+ *
+ *	guard(mutex)(&lock);
+ *	struct object *obj __free(remove_free) = alloc_add();
+ *
+ * Given that the "__free(...) = NULL" pattern for variables defined at
+ * the top of the function poses this potential interdependency problem
+ * the recommendation is to always define and assign variables in one
+ * statement and not group variable definitions at the top of the
+ * function when __free() is used.
+ *
+ * Lastly, given that the benefit of cleanup helpers is removal of
+ * "goto", and that the "goto" statement can jump between scopes, the
+ * expectation is that usage of "goto" and cleanup helpers is never
+ * mixed in the same function. I.e. for a given routine, convert all
+ * resources that need a "goto" cleanup to scope-based cleanup, or
+ * convert none of them.
+ */
+
 /*
  * DEFINE_FREE(name, type, free):
  *	simple helper macro that defines the required wrapper for a __free()
-- 
cgit v1.2.3


From a720dee5e039238a44c0142dfccdc0e35c1125f7 Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Sat, 13 Jul 2024 19:47:33 +1200
Subject: hid-asus: use hid for brightness control on keyboard

On almost all ASUS ROG series laptops the MCU used for the USB keyboard
also has a HID packet used for setting the brightness. This is usually
the same as the WMI method. But in some laptops the WMI method either
is missing or doesn't work, so we should default to the HID control.

Signed-off-by: Luke D. Jones <luke@ljones.dev>
Acked-by: Benjamin Tissoires <bentiss@kernel.org>
Link: https://lore.kernel.org/r/20240713074733.77334-2-luke@ljones.dev
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 36 ++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 5e6f407b2def..b601b245a035 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -4,6 +4,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/dmi.h>
 
 /* WMI Methods */
 #define ASUS_WMI_METHODID_SPEC	        0x43455053 /* BIOS SPECification */
@@ -165,4 +166,39 @@ static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
 }
 #endif
 
+/* To be used by both hid-asus and asus-wmi to determine which controls kbd_brightness */
+static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Zephyrus"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Strix"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "GA403U"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "GU605M"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "RC71L"),
+		},
+	},
+	{ },
+};
+
 #endif	/* __PLATFORM_DATA_X86_ASUS_WMI_H */
-- 
cgit v1.2.3


From e42066df07c0fcedebb32ed56f8bc39b4bf86337 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 5 Aug 2024 15:08:39 +0100
Subject: ASoC: cs35l56: Handle OTP read latency over SoundWire

Use the late-read buffer in the CS35L56 SoundWire interface to
read OTP memory.

The OTP memory has a longer access latency than chip registers
and cannot guarantee to return the data value in the SoundWire
control response if the bus clock is >4.8 MHz. The Cirrus
SoundWire peripheral IP exposes the bridge-to-bus read buffer
and status bits. For a read from OTP the bridge status bits are
polled to wait for the OTP data to be loaded into the read buffer
and the data is then read from there.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Fixes: e1830f66f6c6 ("ASoC: cs35l56: Add helper functions for amp calibration")
Link: https://patch.msgid.link/20240805140839.26042-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index a6aa112e5741..a51acefa785f 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba
 	return 0;
 }
 
+static inline bool cs35l56_is_otp_register(unsigned int reg)
+{
+	return (reg >> 16) == 3;
+}
+
 extern struct regmap_config cs35l56_regmap_i2c;
 extern struct regmap_config cs35l56_regmap_spi;
 extern struct regmap_config cs35l56_regmap_sdw;
-- 
cgit v1.2.3


From 25162a4f64f8ba0065f300977589fe1f6af332f0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 4 Aug 2024 17:16:25 -0700
Subject: Input: cyttsp4 - remove driver

The cyttsp4 touchscreen driver was contributed in 2013 and since then
has seen no updates. The driver uses platform data (no device tree
support) and there are no users of it in the mainline kernel. There were
occasional fixes to it for issues either found by static code analysis
tools or via visual inspection, but otherwise the driver is completely
untested.

Remove the driver.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://lore.kernel.org/r/ZrAZ2cUow_z838tp@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/cyttsp4.h | 62 -----------------------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 include/linux/platform_data/cyttsp4.h

(limited to 'include')

diff --git a/include/linux/platform_data/cyttsp4.h b/include/linux/platform_data/cyttsp4.h
deleted file mode 100644
index 5dc9d2be384b..000000000000
--- a/include/linux/platform_data/cyttsp4.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Header file for:
- * Cypress TrueTouch(TM) Standard Product (TTSP) touchscreen drivers.
- * For use with Cypress Txx3xx parts.
- * Supported parts include:
- * CY8CTST341
- * CY8CTMA340
- *
- * Copyright (C) 2009, 2010, 2011 Cypress Semiconductor, Inc.
- * Copyright (C) 2012 Javier Martinez Canillas <javier@dowhile0.org>
- *
- * Contact Cypress Semiconductor at www.cypress.com (kev@cypress.com)
- */
-#ifndef _CYTTSP4_H_
-#define _CYTTSP4_H_
-
-#define CYTTSP4_MT_NAME "cyttsp4_mt"
-#define CYTTSP4_I2C_NAME "cyttsp4_i2c_adapter"
-#define CYTTSP4_SPI_NAME "cyttsp4_spi_adapter"
-
-#define CY_TOUCH_SETTINGS_MAX 32
-
-struct touch_framework {
-	const uint16_t  *abs;
-	uint8_t         size;
-	uint8_t         enable_vkeys;
-} __packed;
-
-struct cyttsp4_mt_platform_data {
-	struct touch_framework *frmwrk;
-	unsigned short flags;
-	char const *inp_dev_name;
-};
-
-struct touch_settings {
-	const uint8_t *data;
-	uint32_t size;
-	uint8_t tag;
-} __packed;
-
-struct cyttsp4_core_platform_data {
-	int irq_gpio;
-	int rst_gpio;
-	int level_irq_udelay;
-	int (*xres)(struct cyttsp4_core_platform_data *pdata,
-		struct device *dev);
-	int (*init)(struct cyttsp4_core_platform_data *pdata,
-		int on, struct device *dev);
-	int (*power)(struct cyttsp4_core_platform_data *pdata,
-		int on, struct device *dev, atomic_t *ignore_irq);
-	int (*irq_stat)(struct cyttsp4_core_platform_data *pdata,
-		struct device *dev);
-	struct touch_settings *sett[CY_TOUCH_SETTINGS_MAX];
-};
-
-struct cyttsp4_platform_data {
-	struct cyttsp4_core_platform_data *core_pdata;
-	struct cyttsp4_mt_platform_data *mt_pdata;
-};
-
-#endif /* _CYTTSP4_H_ */
-- 
cgit v1.2.3


From f91f7ac900e7342e0fd66093dfbf7cb8cb585a99 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Wed, 17 Jul 2024 15:00:23 +0200
Subject: refcount: Report UAF for refcount_sub_and_test(0) when counter==0

When a reference counter is at zero and refcount_sub_and_test() is invoked
to subtract zero, the function accepts this request without any warning and
returns true. This behavior does not seem ideal because the counter being
already at zero indicates a use-after-free. Furthermore, returning true by
refcount_sub_and_test() in this case potentially results in a double-free
done by its caller.

Modify the underlying function __refcount_sub_and_test() to warn about this
case as a use-after-free and have it return false to avoid the potential
double-free.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240717130023.5675-1-petr.pavlu@suse.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/refcount.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 59b3b752394d..35f039ecb272 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
 	if (oldp)
 		*oldp = old;
 
-	if (old == i) {
+	if (old > 0 && old == i) {
 		smp_acquire__after_ctrl_dep();
 		return true;
 	}
 
-	if (unlikely(old < 0 || old - i < 0))
+	if (unlikely(old <= 0 || old - i < 0))
 		refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
 
 	return false;
-- 
cgit v1.2.3


From a2dc7bee4f77266ebb8e3a8544fc5f7812835f8c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Aug 2024 13:40:25 +0000
Subject: inet: constify inet_sk_bound_dev_eq() net parameter

inet_sk_bound_dev_eq() and its callers do not modify the net structure.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240802134029.3748005-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet6_hashtables.h | 2 +-
 include/net/inet_hashtables.h  | 2 +-
 include/net/inet_sock.h        | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 533a7337865a..591cbf5e4d5f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 
 int inet6_hash(struct sock *sk);
 
-static inline bool inet6_match(struct net *net, const struct sock *sk,
+static inline bool inet6_match(const struct net *net, const struct sock *sk,
 			       const struct in6_addr *saddr,
 			       const struct in6_addr *daddr,
 			       const __portpair ports,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 7f1b38458743..1cc8b7ca20a1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -351,7 +351,7 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 				   ((__force __u64)(__be32)(__saddr)))
 #endif /* __BIG_ENDIAN */
 
-static inline bool inet_match(struct net *net, const struct sock *sk,
+static inline bool inet_match(const struct net *net, const struct sock *sk,
 			      const __addrpair cookie, const __portpair ports,
 			      int dif, int sdif)
 {
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f9ddd47dc4f8..394c3b66065e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -150,7 +150,8 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
 	return bound_dev_if == dif || bound_dev_if == sdif;
 }
 
-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+static inline bool inet_sk_bound_dev_eq(const struct net *net,
+					int bound_dev_if,
 					int dif, int sdif)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-- 
cgit v1.2.3


From d4433e8b405a882fa2ef29601c4ad262ba6e5526 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Aug 2024 13:40:26 +0000
Subject: inet: constify 'struct net' parameter of various lookup helpers

Following helpers do not touch their struct net argument:

- bpf_sk_lookup_run_v4()
- inet_lookup_reuseport()
- inet_lhash2_lookup()
- inet_lookup_run_sk_lookup()
- __inet_lookup_listener()
- __inet_lookup_established()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240802134029.3748005-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/filter.h        | 2 +-
 include/net/inet_hashtables.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b6672ff61407..4acd1da4dac6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1616,7 +1616,7 @@ extern struct static_key_false bpf_sk_lookup_enabled;
 		_all_pass || _selected_sk ? SK_PASS : SK_DROP;		\
 	 })
 
-static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
+static inline bool bpf_sk_lookup_run_v4(const struct net *net, int protocol,
 					const __be32 saddr, const __be16 sport,
 					const __be32 daddr, const u16 dport,
 					const int ifindex, struct sock **psk)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 1cc8b7ca20a1..5eea47f135a4 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -304,7 +304,7 @@ int __inet_hash(struct sock *sk, struct sock *osk);
 int inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
 
-struct sock *__inet_lookup_listener(struct net *net,
+struct sock *__inet_lookup_listener(const struct net *net,
 				    struct inet_hashinfo *hashinfo,
 				    struct sk_buff *skb, int doff,
 				    const __be32 saddr, const __be16 sport,
@@ -368,7 +368,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
  */
-struct sock *__inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(const struct net *net,
 				       struct inet_hashinfo *hashinfo,
 				       const __be32 saddr, const __be16 sport,
 				       const __be32 daddr, const u16 hnum,
@@ -382,13 +382,13 @@ inet_ehashfn_t inet_ehashfn;
 
 INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
 
-struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk,
 				   struct sk_buff *skb, int doff,
 				   __be32 saddr, __be16 sport,
 				   __be32 daddr, unsigned short hnum,
 				   inet_ehashfn_t *ehashfn);
 
-struct sock *inet_lookup_run_sk_lookup(struct net *net,
+struct sock *inet_lookup_run_sk_lookup(const struct net *net,
 				       int protocol,
 				       struct sk_buff *skb, int doff,
 				       __be32 saddr, __be16 sport,
-- 
cgit v1.2.3


From 9a2fa1472083580b6c66bdaf291f591e1170123a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 3 Aug 2024 18:02:00 -0400
Subject: fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE

copy_fd_bitmaps(new, old, count) is expected to copy the first
count/BITS_PER_LONG bits from old->full_fds_bits[] and fill
the rest with zeroes.  What it does is copying enough words
(BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest.
That works fine, *if* all bits past the cutoff point are
clear.  Otherwise we are risking garbage from the last word
we'd copied.

For most of the callers that is true - expand_fdtable() has
count equal to old->max_fds, so there's no open descriptors
past count, let alone fully occupied words in ->open_fds[],
which is what bits in ->full_fds_bits[] correspond to.

The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds),
which is the smallest multiple of BITS_PER_LONG that covers all
opened descriptors below max_fds.  In the common case (copying on
fork()) max_fds is ~0U, so all opened descriptors will be below
it and we are fine, by the same reasons why the call in expand_fdtable()
is safe.

Unfortunately, there is a case where max_fds is less than that
and where we might, indeed, end up with junk in ->full_fds_bits[] -
close_range(from, to, CLOSE_RANGE_UNSHARE) with
	* descriptor table being currently shared
	* 'to' being above the current capacity of descriptor table
	* 'from' being just under some chunk of opened descriptors.
In that case we end up with observably wrong behaviour - e.g. spawn
a child with CLONE_FILES, get all descriptors in range 0..127 open,
then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending
up with descriptor #128, despite #64 being observably not open.

The minimally invasive fix would be to deal with that in dup_fd().
If this proves to add measurable overhead, we can go that way, but
let's try to fix copy_fd_bitmaps() first.

* new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size).
* make copy_fd_bitmaps() take the bitmap size in words, rather than
bits; it's 'count' argument is always a multiple of BITS_PER_LONG,
so we are not losing any information, and that way we can use the
same helper for all three bitmaps - compiler will see that count
is a multiple of BITS_PER_LONG for the large ones, so it'll generate
plain memcpy()+memset().

Reproducer added to tools/testing/selftests/core/close_range_test.c

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/bitmap.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 8c4768c44a01..d3b66d77df7a 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst,
 		dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
 }
 
+static inline void bitmap_copy_and_extend(unsigned long *to,
+					  const unsigned long *from,
+					  unsigned int count, unsigned int size)
+{
+	unsigned int copy = BITS_TO_LONGS(count);
+
+	memcpy(to, from, copy * sizeof(long));
+	if (count % BITS_PER_LONG)
+		to[copy - 1] &= BITMAP_LAST_WORD_MASK(count);
+	memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long));
+}
+
 /*
  * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
  * machines the order of hi and lo parts of numbers match the bitmap structure.
-- 
cgit v1.2.3


From b9abcbb1239cd782f76532397a7c45458de9a73e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Aug 2024 13:40:27 +0000
Subject: udp: constify 'struct net' parameter of socket lookups

Following helpers do not touch their 'struct net' argument.

- udp_sk_bound_dev_eq()
- udp4_lib_lookup()
- __udp4_lib_lookup()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240802134029.3748005-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/udp.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index c4e05b14b648..a0217e3cfe4f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -79,7 +79,8 @@ struct udp_table {
 extern struct udp_table udp_table;
 void udp_table_init(struct udp_table *, const char *);
 static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
-					     struct net *net, unsigned int num)
+					     const struct net *net,
+					     unsigned int num)
 {
 	return &table->hash[udp_hashfn(net, num, table->mask)];
 }
@@ -245,7 +246,7 @@ static inline int udp_rqueue_get(struct sock *sk)
 	return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit);
 }
 
-static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if,
 				       int dif, int sdif)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
@@ -296,9 +297,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		       sockptr_t optval, unsigned int optlen,
 		       int (*push_pending_frames)(struct sock *));
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif);
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
+			       __be16 sport,
 			       __be32 daddr, __be16 dport, int dif, int sdif,
 			       struct udp_table *tbl, struct sk_buff *skb);
 struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
-- 
cgit v1.2.3


From 10b2a44ccb0cdf85480b6f73a23530aa3ac722de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Aug 2024 13:40:28 +0000
Subject: inet6: constify 'struct net' parameter of various lookup helpers

Following helpers do not touch their struct net argument:

- bpf_sk_lookup_run_v6()
- __inet6_lookup_established()
- inet6_lookup_reuseport()
- inet6_lookup_listener()
- inet6_lookup_run_sk_lookup()
- __inet6_lookup()
- inet6_lookup()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240802134029.3748005-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/filter.h         |  2 +-
 include/net/inet6_hashtables.h | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4acd1da4dac6..64e1506fefb8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1653,7 +1653,7 @@ static inline bool bpf_sk_lookup_run_v4(const struct net *net, int protocol,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
+static inline bool bpf_sk_lookup_run_v6(const struct net *net, int protocol,
 					const struct in6_addr *saddr,
 					const __be16 sport,
 					const struct in6_addr *daddr,
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 591cbf5e4d5f..74dd90ff5f12 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -40,7 +40,7 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
  *
  * The sockhash lock must be held as a reader here.
  */
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
 					struct inet_hashinfo *hashinfo,
 					const struct in6_addr *saddr,
 					const __be16 sport,
@@ -56,7 +56,7 @@ inet6_ehashfn_t inet6_ehashfn;
 
 INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn);
 
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
 				    struct sk_buff *skb, int doff,
 				    const struct in6_addr *saddr,
 				    __be16 sport,
@@ -64,7 +64,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
 				    unsigned short hnum,
 				    inet6_ehashfn_t *ehashfn);
 
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
 				   struct inet_hashinfo *hashinfo,
 				   struct sk_buff *skb, int doff,
 				   const struct in6_addr *saddr,
@@ -73,7 +73,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 				   const unsigned short hnum,
 				   const int dif, const int sdif);
 
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
 					int protocol,
 					struct sk_buff *skb, int doff,
 					const struct in6_addr *saddr,
@@ -82,7 +82,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
 					const u16 hnum, const int dif,
 					inet6_ehashfn_t *ehashfn);
 
-static inline struct sock *__inet6_lookup(struct net *net,
+static inline struct sock *__inet6_lookup(const struct net *net,
 					  struct inet_hashinfo *hashinfo,
 					  struct sk_buff *skb, int doff,
 					  const struct in6_addr *saddr,
@@ -167,7 +167,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 			      iif, sdif, refcounted);
 }
 
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo,
 			  struct sk_buff *skb, int doff,
 			  const struct in6_addr *saddr, const __be16 sport,
 			  const struct in6_addr *daddr, const __be16 dport,
-- 
cgit v1.2.3


From 87d973e8ddeeddf1777e6689df86d5d369cbcbb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Aug 2024 13:40:29 +0000
Subject: ipv6: udp: constify 'struct net' parameter of socket lookups

Following helpers do not touch their 'struct net' argument.

- udp6_lib_lookup()
- __udp6_lib_lookup()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240802134029.3748005-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6_stubs.h | 2 +-
 include/net/udp.h        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 11cefd50704d..8a3465c8c2c5 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -82,7 +82,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
 struct ipv6_bpf_stub {
 	int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			  u32 flags);
-	struct sock *(*udp6_lib_lookup)(struct net *net,
+	struct sock *(*udp6_lib_lookup)(const struct net *net,
 				     const struct in6_addr *saddr, __be16 sport,
 				     const struct in6_addr *daddr, __be16 dport,
 				     int dif, int sdif, struct udp_table *tbl,
diff --git a/include/net/udp.h b/include/net/udp.h
index a0217e3cfe4f..5ca53b1cec67 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -305,11 +305,11 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
 			       struct udp_table *tbl, struct sk_buff *skb);
 struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
-struct sock *udp6_lib_lookup(struct net *net,
+struct sock *udp6_lib_lookup(const struct net *net,
 			     const struct in6_addr *saddr, __be16 sport,
 			     const struct in6_addr *daddr, __be16 dport,
 			     int dif);
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
 			       const struct in6_addr *saddr, __be16 sport,
 			       const struct in6_addr *daddr, __be16 dport,
 			       int dif, int sdif, struct udp_table *tbl,
-- 
cgit v1.2.3


From 7e45c1e9edc0004b914e2d5a33245908a06f072c Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Tue, 30 Jul 2024 16:40:52 +0300
Subject: net/mlx5: Add support for MTPTM and MTCTR registers

Make Management Precision Time Measurement (MTPTM) register and Management
Cross Timestamp (MTCTR) register usable in mlx5 driver.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Tested-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20240730134055.1835261-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/device.h   |  7 ++++++-
 include/linux/mlx5/driver.h   |  2 ++
 include/linux/mlx5/mlx5_ifc.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ba875a619b97..a94bc9e3af96 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1243,7 +1243,8 @@ enum mlx5_pcam_feature_groups {
 enum mlx5_mcam_reg_groups {
 	MLX5_MCAM_REGS_FIRST_128                    = 0x0,
 	MLX5_MCAM_REGS_0x9100_0x917F                = 0x2,
-	MLX5_MCAM_REGS_NUM                          = 0x3,
+	MLX5_MCAM_REGS_0x9180_0x91FF                = 0x3,
+	MLX5_MCAM_REGS_NUM                          = 0x4,
 };
 
 enum mlx5_mcam_feature_groups {
@@ -1392,6 +1393,10 @@ enum mlx5_qcam_feature_groups {
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \
 		 mng_access_reg_cap_mask.access_regs2.reg)
 
+#define MLX5_CAP_MCAM_REG3(mdev, reg) \
+	MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9180_0x91FF], \
+		 mng_access_reg_cap_mask.access_regs3.reg)
+
 #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a96438ded15f..9f42834f57c5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -159,6 +159,8 @@ enum {
 	MLX5_REG_MSECQ		 = 0x9155,
 	MLX5_REG_MSEES		 = 0x9156,
 	MLX5_REG_MIRC		 = 0x9162,
+	MLX5_REG_MTPTM		 = 0x9180,
+	MLX5_REG_MTCTR		 = 0x9181,
 	MLX5_REG_SBCAM		 = 0xB01F,
 	MLX5_REG_RESOURCE_DUMP   = 0xC000,
 	MLX5_REG_DTOR            = 0xC00E,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cab228cf51c6..234ad6f16e92 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10401,6 +10401,18 @@ struct mlx5_ifc_mcam_access_reg_bits2 {
 	u8         regs_31_to_0[0x20];
 };
 
+struct mlx5_ifc_mcam_access_reg_bits3 {
+	u8         regs_127_to_96[0x20];
+
+	u8         regs_95_to_64[0x20];
+
+	u8         regs_63_to_32[0x20];
+
+	u8         regs_31_to_2[0x1e];
+	u8         mtctr[0x1];
+	u8         mtptm[0x1];
+};
+
 struct mlx5_ifc_mcam_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         feature_group[0x8];
@@ -10413,6 +10425,7 @@ struct mlx5_ifc_mcam_reg_bits {
 		struct mlx5_ifc_mcam_access_reg_bits access_regs;
 		struct mlx5_ifc_mcam_access_reg_bits1 access_regs1;
 		struct mlx5_ifc_mcam_access_reg_bits2 access_regs2;
+		struct mlx5_ifc_mcam_access_reg_bits3 access_regs3;
 		u8         reserved_at_0[0x80];
 	} mng_access_reg_cap_mask;
 
@@ -11166,6 +11179,34 @@ struct mlx5_ifc_mtmp_reg_bits {
 	u8         sensor_name_lo[0x20];
 };
 
+struct mlx5_ifc_mtptm_reg_bits {
+	u8         reserved_at_0[0x10];
+	u8         psta[0x1];
+	u8         reserved_at_11[0xf];
+
+	u8         reserved_at_20[0x60];
+};
+
+enum {
+	MLX5_MTCTR_REQUEST_NOP = 0x0,
+	MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK = 0x1,
+	MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER = 0x2,
+	MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK = 0x3,
+};
+
+struct mlx5_ifc_mtctr_reg_bits {
+	u8         first_clock_timestamp_request[0x8];
+	u8         second_clock_timestamp_request[0x8];
+	u8         reserved_at_10[0x10];
+
+	u8         first_clock_valid[0x1];
+	u8         second_clock_valid[0x1];
+	u8         reserved_at_22[0x1e];
+
+	u8         first_clock_timestamp[0x40];
+	u8         second_clock_timestamp[0x40];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -11230,6 +11271,8 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
 	struct mlx5_ifc_mtcap_reg_bits mtcap_reg;
 	struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
+	struct mlx5_ifc_mtptm_reg_bits mtptm_reg;
+	struct mlx5_ifc_mtctr_reg_bits mtctr_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3


From c114e9948c2b6a0b400266e59cc656b59e795bca Mon Sep 17 00:00:00 2001
From: Roman Kisel <romank@linux.microsoft.com>
Date: Thu, 18 Jul 2024 11:27:24 -0700
Subject: coredump: Standartize and fix logging

The coredump code does not log the process ID and the comm
consistently, logs unescaped comm when it does log it, and
does not always use the ratelimited logging. That makes it
harder to analyze logs and puts the system at the risk of
spamming the system log incase something crashes many times
over and over again.

Fix that by logging TGID and comm (escaped) consistently and
using the ratelimited logging always.

Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Tested-by: Allen Pais <apais@linux.microsoft.com>
Link: https://lore.kernel.org/r/20240718182743.1959160-2-romank@linux.microsoft.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/coredump.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 0904ba010341..45e598fe3476 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -43,8 +43,30 @@ extern int dump_align(struct coredump_params *cprm, int align);
 int dump_user_range(struct coredump_params *cprm, unsigned long start,
 		    unsigned long len);
 extern void do_coredump(const kernel_siginfo_t *siginfo);
+
+/*
+ * Logging for the coredump code, ratelimited.
+ * The TGID and comm fields are added to the message.
+ */
+
+#define __COREDUMP_PRINTK(Level, Format, ...) \
+	do {	\
+		char comm[TASK_COMM_LEN];	\
+	\
+		get_task_comm(comm, current);	\
+		printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n",	\
+			task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__);	\
+	} while (0)	\
+
+#define coredump_report(fmt, ...) __COREDUMP_PRINTK(KERN_INFO, fmt, ##__VA_ARGS__)
+#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
+
 #else
 static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
+
+#define coredump_report(...)
+#define coredump_report_failure(...)
+
 #endif
 
 #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
-- 
cgit v1.2.3


From fb97d2eb542faf19a8725afbd75cbc2518903210 Mon Sep 17 00:00:00 2001
From: Roman Kisel <romank@linux.microsoft.com>
Date: Thu, 18 Jul 2024 11:27:25 -0700
Subject: binfmt_elf, coredump: Log the reason of the failed core dumps

Missing, failed, or corrupted core dumps might impede crash
investigations. To improve reliability of that process and consequently
the programs themselves, one needs to trace the path from producing
a core dumpfile to analyzing it. That path starts from the core dump file
written to the disk by the kernel or to the standard input of a user
mode helper program to which the kernel streams the coredump contents.
There are cases where the kernel will interrupt writing the core out or
produce a truncated/not-well-formed core dump without leaving a note.

Add logging for the core dump collection failure paths to be able to reason
what has gone wrong when the core dump is malformed or missing.
Report the size of the data written to aid in diagnosing the user mode
helper.

Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Link: https://lore.kernel.org/r/20240718182743.1959160-3-romank@linux.microsoft.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/coredump.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 45e598fe3476..edeb8532ce0f 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 int dump_user_range(struct coredump_params *cprm, unsigned long start,
 		    unsigned long len);
-extern void do_coredump(const kernel_siginfo_t *siginfo);
+extern int do_coredump(const kernel_siginfo_t *siginfo);
 
 /*
  * Logging for the coredump code, ratelimited.
@@ -62,7 +62,11 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
 #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
 
 #else
-static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
+static inline int do_coredump(const kernel_siginfo_t *siginfo)
+{
+	/* Coredump support is not available, can't fail. */
+	return 0;
+}
 
 #define coredump_report(...)
 #define coredump_report_failure(...)
-- 
cgit v1.2.3


From 0079c9d1e58a39148e6ce13bda55307ea6aa3a9e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 6 Aug 2024 09:00:23 +0200
Subject: ALSA: ump: Handle MIDI 1.0 Function Block in MIDI 2.0 protocol

The UMP v1.1 spec says in the section 6.2.1:
"If a UMP Endpoint declares MIDI 2.0 Protocol but a Function Block
represents a MIDI 1.0 connection, then may optionally be used for
messages to/from that Function Block."

It implies that the driver can (and should) keep MIDI 1.0 CVM
exceptionally for those FBs even if UMP Endpoint is running in MIDI
2.0 protocol, and the current driver lacks of it.

This patch extends the sequencer port info to indicate a MIDI 1.0
port, and tries to send/receive MIDI 1.0 CVM as is when this port is
the source or sink.  The sequencer port flag is set by the driver at
parsing FBs and GTBs although application can set it to its own
user-space clients, too.

Link: https://patch.msgid.link/20240806070024.14301-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ump.h             | 1 +
 include/uapi/sound/asequencer.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/ump.h b/include/sound/ump.h
index 7f68056acdff..7484f62fb234 100644
--- a/include/sound/ump.h
+++ b/include/sound/ump.h
@@ -18,6 +18,7 @@ struct snd_ump_group {
 	unsigned int dir_bits;		/* directions */
 	bool active;			/* activeness */
 	bool valid;			/* valid group (referred by blocks) */
+	bool is_midi1;			/* belongs to a MIDI1 FB */
 	char name[64];			/* group name */
 };
 
diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h
index 39b37edcf813..bc30c1f2a109 100644
--- a/include/uapi/sound/asequencer.h
+++ b/include/uapi/sound/asequencer.h
@@ -461,6 +461,8 @@ struct snd_seq_remove_events {
 #define SNDRV_SEQ_PORT_FLG_TIMESTAMP	(1<<1)
 #define SNDRV_SEQ_PORT_FLG_TIME_REAL	(1<<2)
 
+#define SNDRV_SEQ_PORT_FLG_IS_MIDI1	(1<<3)	/* Keep MIDI 1.0 protocol */
+
 /* port direction */
 #define SNDRV_SEQ_PORT_DIR_UNKNOWN	0
 #define SNDRV_SEQ_PORT_DIR_INPUT	1
-- 
cgit v1.2.3


From 7424fc6b86c8980a87169e005f5cd4438d18efe6 Mon Sep 17 00:00:00 2001
From: Gatlin Newhouse <gatlin.newhouse@gmail.com>
Date: Wed, 24 Jul 2024 00:01:55 +0000
Subject: x86/traps: Enable UBSAN traps on x86

Currently ARM64 extracts which specific sanitizer has caused a trap via
encoded data in the trap instruction. Clang on x86 currently encodes the
same data in the UD1 instruction but x86 handle_bug() and
is_valid_bugaddr() currently only look at UD2.

Bring x86 to parity with ARM64, similar to commit 25b84002afb9 ("arm64:
Support Clang UBSAN trap codes for better reporting"). See the llvm
links for information about the code generation.

Enable the reporting of UBSAN sanitizer details on x86 compiled with clang
when CONFIG_UBSAN_TRAP=y by analysing UD1 and retrieving the type immediate
which is encoded by the compiler after the UD1.

[ tglx: Simplified it by moving the printk() into handle_bug() ]

Signed-off-by: Gatlin Newhouse <gatlin.newhouse@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/all/20240724000206.451425-1-gatlin.newhouse@gmail.com
Link: https://github.com/llvm/llvm-project/commit/c5978f42ec8e9#diff-bb68d7cd885f41cfc35843998b0f9f534adb60b415f647109e597ce448e92d9f
Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86InstrSystem.td#L27
---
 include/linux/ubsan.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
index bff7445498de..d8219cbe09ff 100644
--- a/include/linux/ubsan.h
+++ b/include/linux/ubsan.h
@@ -4,6 +4,11 @@
 
 #ifdef CONFIG_UBSAN_TRAP
 const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
+#else
+static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
+{
+	return NULL;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 7d2fb3812acde0a76e0d361877e8295db065f9f4 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 6 Aug 2024 00:00:57 +0000
Subject: ASoC: remove bespoke trigger support

Bespoke trigger support was added when Linux v3.5 by this patch.

	commit 07bf84aaf736781a283b1bd36eaa911453b14574
	("ASoC: dpcm: Add bespoke trigger()")

test-component driver is using it, but this is because it indicates used
function for debug/trace purpose. Except test-component driver, bespoke
trigger has never been used over 10 years in upstream.

We can re-support it if needed in the future, but let's remove it for now,
because it just noise in upstream.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87v80ewmdi.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h  | 5 +----
 include/sound/soc-dpcm.h | 1 -
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index bbb72ad4c951..ab4e109fe71d 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -240,8 +240,6 @@ int snd_soc_pcm_dai_new(struct snd_soc_pcm_runtime *rtd);
 int snd_soc_pcm_dai_prepare(struct snd_pcm_substream *substream);
 int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, int cmd,
 			    int rollback);
-int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream,
-				    int cmd);
 void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream,
 			   snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay);
 
@@ -345,8 +343,7 @@ struct snd_soc_dai_ops {
 	 */
 	int (*trigger)(struct snd_pcm_substream *, int,
 		struct snd_soc_dai *);
-	int (*bespoke_trigger)(struct snd_pcm_substream *, int,
-		struct snd_soc_dai *);
+
 	/*
 	 * For hardware based FIFO caused delay reporting.
 	 * Optional.
diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index ebd24753dd00..773f2db8c31c 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -58,7 +58,6 @@ enum snd_soc_dpcm_state {
 enum snd_soc_dpcm_trigger {
 	SND_SOC_DPCM_TRIGGER_PRE		= 0,
 	SND_SOC_DPCM_TRIGGER_POST,
-	SND_SOC_DPCM_TRIGGER_BESPOKE,
 };
 
 /*
-- 
cgit v1.2.3


From db65eb46de135338d6177f8853e0fd208f19d63e Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Fri, 2 Aug 2024 11:13:19 +0530
Subject: drm/buddy: Add start address support to trim function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add a new start parameter in trim function to specify exact
  address from where to start the trimming. This would help us
  in situations like if drivers would like to do address alignment
  for specific requirements.

- Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this
  flag to disable the allocator trimming part. This patch enables
  the drivers control trimming and they can do it themselves
  based on the application requirements.

v1:(Matthew)
  - check new_start alignment with min chunk_size
  - use range_overflows()

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/drm_buddy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 2a74fa9d0ce5..9689a7c5dd36 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -27,6 +27,7 @@
 #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
 #define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
 #define DRM_BUDDY_CLEARED			BIT(4)
+#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
@@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   unsigned long flags);
 
 int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 *start,
 			 u64 new_size,
 			 struct list_head *blocks);
 
-- 
cgit v1.2.3


From 90c36325c796cc7111329607a649c34f4979c78e Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:06 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_CLOSE for active
 reset

Introducing a new type TCP_ABORT_ON_CLOSE for tcp reset reason to handle
the case where more data is unread in closing phase.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index 2575c85d7f7a..fa6bfd0d7d69 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -17,6 +17,7 @@
 	FN(TCP_ABORT_ON_DATA)		\
 	FN(TCP_TIMEWAIT_SOCKET)		\
 	FN(INVALID_SYN)			\
+	FN(TCP_ABORT_ON_CLOSE)		\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -84,6 +85,11 @@ enum sk_rst_reason {
 	 * an error, send a reset"
 	 */
 	SK_RST_REASON_INVALID_SYN,
+	/**
+	 * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close
+	 * corresponding to LINUX_MIB_TCPABORTONCLOSE
+	 */
+	SK_RST_REASON_TCP_ABORT_ON_CLOSE,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From edc92b48abc5b21c98eca5d05b98a560d7df2e4d Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:07 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_LINGER for
 active reset

Introducing a new type TCP_ABORT_ON_LINGER for tcp reset reason to handle
negative linger value case.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index fa6bfd0d7d69..fbbaeb969e6a 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -18,6 +18,7 @@
 	FN(TCP_TIMEWAIT_SOCKET)		\
 	FN(INVALID_SYN)			\
 	FN(TCP_ABORT_ON_CLOSE)		\
+	FN(TCP_ABORT_ON_LINGER)		\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -90,6 +91,11 @@ enum sk_rst_reason {
 	 * corresponding to LINUX_MIB_TCPABORTONCLOSE
 	 */
 	SK_RST_REASON_TCP_ABORT_ON_CLOSE,
+	/**
+	 * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger
+	 * corresponding to LINUX_MIB_TCPABORTONLINGER
+	 */
+	SK_RST_REASON_TCP_ABORT_ON_LINGER,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From 8407994f0c3594eb3854e3799af86224f4a8e6e6 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:08 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_MEMORY for
 active reset

Introducing a new type TCP_ABORT_ON_MEMORY for tcp reset reason to handle
out of memory case.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index fbbaeb969e6a..eef658da8952 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -19,6 +19,7 @@
 	FN(INVALID_SYN)			\
 	FN(TCP_ABORT_ON_CLOSE)		\
 	FN(TCP_ABORT_ON_LINGER)		\
+	FN(TCP_ABORT_ON_MEMORY)		\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -96,6 +97,11 @@ enum sk_rst_reason {
 	 * corresponding to LINUX_MIB_TCPABORTONLINGER
 	 */
 	SK_RST_REASON_TCP_ABORT_ON_LINGER,
+	/**
+	 * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory
+	 * corresponding to LINUX_MIB_TCPABORTONMEMORY
+	 */
+	SK_RST_REASON_TCP_ABORT_ON_MEMORY,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From edefba66d929eb2d023df93a0a8175a4ffe82684 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:09 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_STATE for active reset

Introducing a new type TCP_STATE to handle some reset conditions
appearing in RFC 793 due to its socket state. Actually, we can look
into RFC 9293 which has no discrepancy about this part.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index eef658da8952..bbf20d0bbde7 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -20,6 +20,7 @@
 	FN(TCP_ABORT_ON_CLOSE)		\
 	FN(TCP_ABORT_ON_LINGER)		\
 	FN(TCP_ABORT_ON_MEMORY)		\
+	FN(TCP_STATE)			\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -102,6 +103,11 @@ enum sk_rst_reason {
 	 * corresponding to LINUX_MIB_TCPABORTONMEMORY
 	 */
 	SK_RST_REASON_TCP_ABORT_ON_MEMORY,
+	/**
+	 * @SK_RST_REASON_TCP_STATE: abort on tcp state
+	 * Please see RFC 9293 for all possible reset conditions
+	 */
+	SK_RST_REASON_TCP_STATE,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From 0a399892a596055c8f069a17b4775fe5ab66d32a Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:10 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT for
 active reset

Introducing this to show the users the reason of keepalive timeout.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index bbf20d0bbde7..9c0c46df0e73 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -21,6 +21,7 @@
 	FN(TCP_ABORT_ON_LINGER)		\
 	FN(TCP_ABORT_ON_MEMORY)		\
 	FN(TCP_STATE)			\
+	FN(TCP_KEEPALIVE_TIMEOUT)	\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -108,6 +109,12 @@ enum sk_rst_reason {
 	 * Please see RFC 9293 for all possible reset conditions
 	 */
 	SK_RST_REASON_TCP_STATE,
+	/**
+	 * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout
+	 * When we have already run out of all the chances, which means
+	 * keepalive timeout, we have to reset the connection
+	 */
+	SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From c026c6562f86b24dd2dfef501fb1e64cc3884a79 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 2 Aug 2024 18:21:11 +0800
Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_DISCONNECT_WITH_DATA for
 active reset

When user tries to disconnect a socket and there are more data written
into tcp write queue, we should tell users about this reset reason.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rstreason.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index 9c0c46df0e73..69cb2e52b7da 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -22,6 +22,7 @@
 	FN(TCP_ABORT_ON_MEMORY)		\
 	FN(TCP_STATE)			\
 	FN(TCP_KEEPALIVE_TIMEOUT)	\
+	FN(TCP_DISCONNECT_WITH_DATA)	\
 	FN(MPTCP_RST_EUNSPEC)		\
 	FN(MPTCP_RST_EMPTCP)		\
 	FN(MPTCP_RST_ERESOURCE)		\
@@ -115,6 +116,13 @@ enum sk_rst_reason {
 	 * keepalive timeout, we have to reset the connection
 	 */
 	SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT,
+	/**
+	 * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write
+	 * queue is not empty
+	 * It means user has written data into the write queue when doing
+	 * disconnecting, so we have to send an RST.
+	 */
+	SK_RST_REASON_TCP_DISCONNECT_WITH_DATA,
 
 	/* Copy from include/uapi/linux/mptcp.h.
 	 * These reset fields will not be changed since they adhere to
-- 
cgit v1.2.3


From ebaa86c0bddd2c47c516bf2096b17c0bed71d914 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 11:22:59 +0200
Subject: ALSA: usb-audio: Update UMP group attributes for GTB blocks, too

When a FB is created from a GTB instead of UMP FB Info inquiry, we
missed the update of the corresponding UMP Group attributes.
Export the call of updater and let it be called from the USB driver.

Fixes: 0642a3c5cacc ("ALSA: ump: Update substream name from assigned FB names")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807092303.1935-5-tiwai@suse.de
---
 include/sound/ump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/ump.h b/include/sound/ump.h
index 7484f62fb234..532c2c3ea28e 100644
--- a/include/sound/ump.h
+++ b/include/sound/ump.h
@@ -123,6 +123,7 @@ static inline int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump,
 
 int snd_ump_receive_ump_val(struct snd_ump_endpoint *ump, u32 val);
 int snd_ump_switch_protocol(struct snd_ump_endpoint *ump, unsigned int protocol);
+void snd_ump_update_group_attrs(struct snd_ump_endpoint *ump);
 
 /*
  * Some definitions for UMP
-- 
cgit v1.2.3


From 97edbc02b2efdb0cd0f507b6a45fc509acc861bb Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 10 Jul 2024 14:51:11 -0400
Subject: buffer: Convert block_write_end() to take a folio

All callers now have a folio, so pass it in instead of converting
from a folio to a page and back to a folio again.  Saves a call
to compound_head().

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/buffer_head.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 14acf1bbe0ce..3a3fec154536 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -262,8 +262,8 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block);
 int block_write_end(struct file *, struct address_space *,
-				loff_t, unsigned, unsigned,
-				struct page *, void *);
+				loff_t, unsigned len, unsigned copied,
+				struct folio *, void *);
 int generic_write_end(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page *, void *);
-- 
cgit v1.2.3


From a225800f322a3d6cc8b8b6c7dc4d5281f2f5375b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 10 Jul 2024 15:45:32 -0400
Subject: fs: Convert aops->write_end to take a folio

Most callers have a folio, and most implementations operate on a folio,
so remove the conversion from folio->page->folio to fit through this
interface.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/buffer_head.h | 4 ++--
 include/linux/fs.h          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3a3fec154536..165e859664a5 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -265,8 +265,8 @@ int block_write_end(struct file *, struct address_space *,
 				loff_t, unsigned len, unsigned copied,
 				struct folio *, void *);
 int generic_write_end(struct file *, struct address_space *,
-				loff_t, unsigned, unsigned,
-				struct page *, void *);
+				loff_t, unsigned len, unsigned copied,
+				struct folio *, void *);
 void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
 int cont_write_begin(struct file *, struct address_space *, loff_t,
 			unsigned, struct page **, void **,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..1af3373c7cd2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -411,7 +411,7 @@ struct address_space_operations {
 				struct page **pagep, void **fsdata);
 	int (*write_end)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
-				struct page *page, void *fsdata);
+				struct folio *folio, void *fsdata);
 
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
-- 
cgit v1.2.3


From 1da86618bdce301d23e89ecce92161f9d3b3c5e7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 15 Jul 2024 14:24:01 -0400
Subject: fs: Convert aops->write_begin to take a folio

Convert all callers from working on a page to working on one page
of a folio (support for working on an entire folio can come later).
Removes a lot of folio->page->folio conversions.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/buffer_head.h | 4 ++--
 include/linux/fs.h          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 165e859664a5..254563a2a9de 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -258,7 +258,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
 int block_read_full_folio(struct folio *, get_block_t *);
 bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
-		struct page **pagep, get_block_t *get_block);
+		struct folio **foliop, get_block_t *get_block);
 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block);
 int block_write_end(struct file *, struct address_space *,
@@ -269,7 +269,7 @@ int generic_write_end(struct file *, struct address_space *,
 				struct folio *, void *);
 void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
 int cont_write_begin(struct file *, struct address_space *, loff_t,
-			unsigned, struct page **, void **,
+			unsigned, struct folio **, void **,
 			get_block_t *, loff_t *);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 void block_commit_write(struct page *page, unsigned int from, unsigned int to);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1af3373c7cd2..7a79b88a8f5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -408,7 +408,7 @@ struct address_space_operations {
 
 	int (*write_begin)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len,
-				struct page **pagep, void **fsdata);
+				struct folio **foliop, void **fsdata);
 	int (*write_end)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct folio *folio, void *fsdata);
@@ -3331,7 +3331,7 @@ extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len,
-			struct page **pagep, void **fsdata);
+			struct folio **foliop, void **fsdata);
 extern const struct address_space_operations ram_aops;
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
-- 
cgit v1.2.3


From 9f04609f74ec7a439e1ac42da5db9e6ddf4f7b13 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 10 Jul 2024 23:09:04 -0400
Subject: buffer: Convert __block_write_begin() to take a folio

Almost all callers have a folio now, so change __block_write_begin()
to take a folio and remove a call to compound_head().

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/buffer_head.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 254563a2a9de..c85e65fd4cdc 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -259,7 +259,7 @@ int block_read_full_folio(struct folio *, get_block_t *);
 bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 		struct folio **foliop, get_block_t *get_block);
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
 		get_block_t *get_block);
 int block_write_end(struct file *, struct address_space *,
 				loff_t, unsigned len, unsigned copied,
-- 
cgit v1.2.3


From 07a83512fa5be3f3d46369eee6352102fd9fb505 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:08 +0100
Subject: usb: typec: tcpci: fix a comment typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

autonously -> autonomously

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-1-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 0ab39b6ea205..d27fe0c22a8b 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -190,7 +190,7 @@ struct tcpci;
  *		Optional; Callback to perform chip specific operations when FRS
  *		is sourcing vbus.
  * @auto_discharge_disconnect:
- *		Optional; Enables TCPC to autonously discharge vbus on disconnect.
+ *		Optional; Enables TCPC to autonomously discharge vbus on disconnect.
  * @vbus_vsafe0v:
  *		optional; Set when TCPC can detect whether vbus is at VSAFE0V.
  * @set_partner_usb_comm_capable:
-- 
cgit v1.2.3


From f523aa6d72598bcd2946c7e02d29f33f94806a15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:10 +0100
Subject: usb: typec: tcpci: use GENMASK() for TCPC_CC_STATUS_CC[12]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing code here, particularly in maxim_contaminant.c, is
arguably quite hard to read due to the open-coded masking and shifting
spanning multiple lines.

Use GENMASK() and FIELD_GET() instead, which arguably make the code
much easier to follow.

While at it, use the symbolic name TCPC_CC_STATE_SRC_OPEN for one
instance of open-coded 0x0.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-3-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index d27fe0c22a8b..31d21ccf662b 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -92,11 +92,9 @@
 #define TCPC_CC_STATUS_TERM		BIT(4)
 #define TCPC_CC_STATUS_TERM_RP		0
 #define TCPC_CC_STATUS_TERM_RD		1
+#define TCPC_CC_STATUS_CC2		GENMASK(3, 2)
+#define TCPC_CC_STATUS_CC1		GENMASK(1, 0)
 #define TCPC_CC_STATE_SRC_OPEN		0
-#define TCPC_CC_STATUS_CC2_SHIFT	2
-#define TCPC_CC_STATUS_CC2_MASK		0x3
-#define TCPC_CC_STATUS_CC1_SHIFT	0
-#define TCPC_CC_STATUS_CC1_MASK		0x3
 
 #define TCPC_POWER_STATUS		0x1e
 #define TCPC_POWER_STATUS_DBG_ACC_CON	BIT(7)
@@ -256,7 +254,7 @@ static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink)
 		if (sink)
 			return TYPEC_CC_RP_3_0;
 		fallthrough;
-	case 0x0:
+	case TCPC_CC_STATE_SRC_OPEN:
 	default:
 		return TYPEC_CC_OPEN;
 	}
-- 
cgit v1.2.3


From 83b254c13ac093810e1058d9cb1bbb4b7ef9c246 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:11 +0100
Subject: usb: typec: tcpci: use GENMASK() for TCPC_ROLE_CTRL_CC[12]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All this open-coded shifting and masking is quite hard to read, in
particular the if-statement in tcpci_apply_rc().

Declare TCPC_ROLE_CTRL_CC[12] using GENMASK() which allows using
FIELD_GET() and FIELD_PREP() to arguably make the code more legible.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-4-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 31d21ccf662b..552d074429f0 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -68,10 +68,8 @@
 #define TCPC_ROLE_CTRL_RP_VAL_DEF	0x0
 #define TCPC_ROLE_CTRL_RP_VAL_1_5	0x1
 #define TCPC_ROLE_CTRL_RP_VAL_3_0	0x2
-#define TCPC_ROLE_CTRL_CC2_SHIFT	2
-#define TCPC_ROLE_CTRL_CC2_MASK		0x3
-#define TCPC_ROLE_CTRL_CC1_SHIFT	0
-#define TCPC_ROLE_CTRL_CC1_MASK		0x3
+#define TCPC_ROLE_CTRL_CC2		GENMASK(3, 2)
+#define TCPC_ROLE_CTRL_CC1		GENMASK(1, 0)
 #define TCPC_ROLE_CTRL_CC_RA		0x0
 #define TCPC_ROLE_CTRL_CC_RP		0x1
 #define TCPC_ROLE_CTRL_CC_RD		0x2
@@ -176,8 +174,7 @@
 
 #define tcpc_presenting_rd(reg, cc) \
 	(!(TCPC_ROLE_CTRL_DRP & (reg)) && \
-	 (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \
-	  (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT)))
+	 FIELD_GET(TCPC_ROLE_CTRL_## cc, reg) == TCPC_ROLE_CTRL_CC_RD)
 
 struct tcpci;
 
-- 
cgit v1.2.3


From 46b1e0f87ba89f0a06ffbde5fe8d13c5af93f94e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:12 +0100
Subject: usb: typec: tcpci: use GENMASK() for TCPC_ROLE_CTRL_RP_VAL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Align the last remaining field TCPC_ROLE_CTRL_RP_VAL with the other
fields in the TCPC_ROLE_CTRL register by using GENMASK() and
FIELD_PREP().

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-5-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 552d074429f0..80652d4f722e 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -63,8 +63,7 @@
 
 #define TCPC_ROLE_CTRL			0x1a
 #define TCPC_ROLE_CTRL_DRP		BIT(6)
-#define TCPC_ROLE_CTRL_RP_VAL_SHIFT	4
-#define TCPC_ROLE_CTRL_RP_VAL_MASK	0x3
+#define TCPC_ROLE_CTRL_RP_VAL		GENMASK(5, 4)
 #define TCPC_ROLE_CTRL_RP_VAL_DEF	0x0
 #define TCPC_ROLE_CTRL_RP_VAL_1_5	0x1
 #define TCPC_ROLE_CTRL_RP_VAL_3_0	0x2
-- 
cgit v1.2.3


From aee4568f42e0d7526207a10944bb89bb45522b59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:13 +0100
Subject: usb: typec: tcpci: use GENMASK() for TCPC_MSG_HDR_INFO_REV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert field TCPC_MSG_HDR_INFO_REV from register TCPC_MSG_HDR_INFO to
using GENMASK() and FIELD_PREP() so as to keep using a similar approach
for all fields.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-6-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 80652d4f722e..3cd61e9f73b3 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -129,9 +129,8 @@
 
 #define TCPC_MSG_HDR_INFO		0x2e
 #define TCPC_MSG_HDR_INFO_DATA_ROLE	BIT(3)
+#define TCPC_MSG_HDR_INFO_REV		GENMASK(2, 1)
 #define TCPC_MSG_HDR_INFO_PWR_ROLE	BIT(0)
-#define TCPC_MSG_HDR_INFO_REV_SHIFT	1
-#define TCPC_MSG_HDR_INFO_REV_MASK	0x3
 
 #define TCPC_RX_DETECT			0x2f
 #define TCPC_RX_DETECT_HARD_RESET	BIT(5)
-- 
cgit v1.2.3


From 7cd41974d2c81055f61ba9f69e31c02e866716e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 10 Jul 2024 11:36:14 +0100
Subject: usb: typec: tcpci: use GENMASK() for TCPC_TRANSMIT register fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert all fields from register TCPC_TRANSMIT to using GENMASK() and
FIELD_PREP() so as to keep using a similar approach throughout the code
base and make it arguably easier to read.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-7-0ec1f41f4263@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 3cd61e9f73b3..f7f5cfbdef12 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -148,10 +148,8 @@
 #define TCPC_RX_DATA			0x34 /* through 0x4f */
 
 #define TCPC_TRANSMIT			0x50
-#define TCPC_TRANSMIT_RETRY_SHIFT	4
-#define TCPC_TRANSMIT_RETRY_MASK	0x3
-#define TCPC_TRANSMIT_TYPE_SHIFT	0
-#define TCPC_TRANSMIT_TYPE_MASK		0x7
+#define TCPC_TRANSMIT_RETRY		GENMASK(5, 4)
+#define TCPC_TRANSMIT_TYPE		GENMASK(2, 0)
 
 #define TCPC_TX_BYTE_CNT		0x51
 #define TCPC_TX_HDR			0x52
-- 
cgit v1.2.3


From 08d6fd691bdd5d6e7881ce42ca6774ed92b10896 Mon Sep 17 00:00:00 2001
From: Akash Kumar <quic_akakum@quicinc.com>
Date: Thu, 1 Aug 2024 11:00:03 +0530
Subject: usb: gadget: Increase max configuration interface to 32

Currently, max configuration interfaces are limited to 16, which fails
for compositions containing 10 UVC configurations with interrupt ep
disabled along with other configurations , and we see bind failures
while allocating interface ID in uvc bind.

Increase max configuration interface to 32 to support any large
compositions limited to same size as usb device endpoints as
interfaces cannot be more than endpoints.

Signed-off-by: Akash Kumar <quic_akakum@quicinc.com>
Link: https://lore.kernel.org/r/20240801053003.15153-1-quic_akakum@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/composite.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index af3cd2aae4bc..6e38fb9d2117 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -256,7 +256,7 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f,
 			struct usb_ep *_ep);
 int usb_func_wakeup(struct usb_function *func);
 
-#define	MAX_CONFIG_INTERFACES		16	/* arbitrary; max 255 */
+#define	MAX_CONFIG_INTERFACES		32
 
 /**
  * struct usb_configuration - represents one gadget configuration
-- 
cgit v1.2.3


From 259b46204885431f2853afa2a2bffa63f3705e67 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Mon, 5 Aug 2024 12:20:37 +0200
Subject: serial: remove quot_frac from serial8250_do_set_divisor()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

quot_frac is unused in serial8250_do_set_divisor() since commit
b2b4b8ed3c06 (serial: 8250_exar: Move custom divisor support out from
8250_port). So no point to pass it.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240805102046.307511-5-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index fd59ed2cca53..e0717c8393d7 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -193,7 +193,7 @@ void serial8250_do_pm(struct uart_port *port, unsigned int state,
 		      unsigned int oldstate);
 void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl);
 void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
-			       unsigned int quot, unsigned int quot_frac);
+			       unsigned int quot);
 int fsl8250_handle_irq(struct uart_port *port);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr);
-- 
cgit v1.2.3


From 3dc73106ffc47640e692b9b32ebfd59d776c07fd Mon Sep 17 00:00:00 2001
From: Varshini Rajendran <varshini.rajendran@microchip.com>
Date: Mon, 29 Jul 2024 12:38:03 +0530
Subject: dt-bindings: clock: at91: Allow PLLs to be exported and referenced in
 DT

Allow PLLADIV2 and LVDSPLL to be referenced as a PMC_TYPE_CORE
clock from phandle in DT for sam9x7 SoC family.

Signed-off-by: Varshini Rajendran <varshini.rajendran@microchip.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>
Link: https://lore.kernel.org/r/20240729070803.1990916-1-varshini.rajendran@microchip.com
Signed-off-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>
---
 include/dt-bindings/clock/at91.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h
index 3e3972a814c1..6ede88c3992d 100644
--- a/include/dt-bindings/clock/at91.h
+++ b/include/dt-bindings/clock/at91.h
@@ -38,6 +38,10 @@
 #define PMC_CPU			(PMC_MAIN + 9)
 #define PMC_MCK1		(PMC_MAIN + 10)
 
+/* SAM9X7 */
+#define PMC_PLLADIV2		(PMC_MAIN + 11)
+#define PMC_LVDSPLL		(PMC_MAIN + 12)
+
 #ifndef AT91_PMC_MOSCS
 #define AT91_PMC_MOSCS		0		/* MOSCS Flag */
 #define AT91_PMC_LOCKA		1		/* PLLA Lock */
-- 
cgit v1.2.3


From 130fd056dd82b02db9a661c013071af35309be1a Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@layalina.io>
Date: Mon, 10 Jun 2024 20:20:16 +0100
Subject: sched/rt: Clean up usage of rt_task()

rt_task() checks if a task has RT priority. But depends on your
dictionary, this could mean it belongs to RT class, or is a 'realtime'
task, which includes RT and DL classes.

Since this has caused some confusion already on discussion [1], it
seemed a clean up is due.

I define the usage of rt_task() to be tasks that belong to RT class.
Make sure that it returns true only for RT class and audit the users and
replace the ones required the old behavior with the new realtime_task()
which returns true for RT and DL classes. Introduce similar
realtime_prio() to create similar distinction to rt_prio() and update
the users that required the old behavior to use the new function.

Move MAX_DL_PRIO to prio.h so it can be used in the new definitions.

Document the functions to make it more obvious what is the difference
between them. PI-boosted tasks is a factor that must be taken into
account when choosing which function to use.

Rename task_is_realtime() to realtime_task_policy() as the old name is
confusing against the new realtime_task().

No functional changes were intended.

[1] https://lore.kernel.org/lkml/20240506100509.GL40213@noisy.programming.kicks-ass.net/

Signed-off-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Phil Auld <pauld@redhat.com>
Reviewed-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20240610192018.1567075-2-qyousef@layalina.io
---
 include/linux/ioprio.h         |  2 +-
 include/linux/sched/deadline.h |  6 ++++--
 include/linux/sched/prio.h     |  1 +
 include/linux/sched/rt.h       | 27 ++++++++++++++++++++++++++-
 4 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index db1249cd9692..75859b78d540 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task)
 {
 	if (task->policy == SCHED_IDLE)
 		return IOPRIO_CLASS_IDLE;
-	else if (task_is_realtime(task))
+	else if (realtime_task_policy(task))
 		return IOPRIO_CLASS_RT;
 	else
 		return IOPRIO_CLASS_BE;
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index df3aca89d4f5..5cb88b748ad6 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -10,8 +10,6 @@
 
 #include <linux/sched.h>
 
-#define MAX_DL_PRIO		0
-
 static inline int dl_prio(int prio)
 {
 	if (unlikely(prio < MAX_DL_PRIO))
@@ -19,6 +17,10 @@ static inline int dl_prio(int prio)
 	return 0;
 }
 
+/*
+ * Returns true if a task has a priority that belongs to DL class. PI-boosted
+ * tasks will return true. Use dl_policy() to ignore PI-boosted tasks.
+ */
 static inline int dl_task(struct task_struct *p)
 {
 	return dl_prio(p->prio);
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index ab83d85e1183..6ab43b4f72f9 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -14,6 +14,7 @@
  */
 
 #define MAX_RT_PRIO		100
+#define MAX_DL_PRIO		0
 
 #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
 #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index b2b9e6eb9683..a055dd68a77c 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -7,18 +7,43 @@
 struct task_struct;
 
 static inline int rt_prio(int prio)
+{
+	if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO))
+		return 1;
+	return 0;
+}
+
+static inline int realtime_prio(int prio)
 {
 	if (unlikely(prio < MAX_RT_PRIO))
 		return 1;
 	return 0;
 }
 
+/*
+ * Returns true if a task has a priority that belongs to RT class. PI-boosted
+ * tasks will return true. Use rt_policy() to ignore PI-boosted tasks.
+ */
 static inline int rt_task(struct task_struct *p)
 {
 	return rt_prio(p->prio);
 }
 
-static inline bool task_is_realtime(struct task_struct *tsk)
+/*
+ * Returns true if a task has a priority that belongs to RT or DL classes.
+ * PI-boosted tasks will return true. Use realtime_task_policy() to ignore
+ * PI-boosted tasks.
+ */
+static inline int realtime_task(struct task_struct *p)
+{
+	return realtime_prio(p->prio);
+}
+
+/*
+ * Returns true if a task has a policy that belongs to RT or DL classes.
+ * PI-boosted tasks will return false.
+ */
+static inline bool realtime_task_policy(struct task_struct *tsk)
 {
 	int policy = tsk->policy;
 
-- 
cgit v1.2.3


From b166af3db70fdcecf125662a2360471bb20be203 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@layalina.io>
Date: Mon, 10 Jun 2024 20:20:17 +0100
Subject: sched/rt, dl: Convert functions to return bool

{rt, realtime, dl}_{task, prio}() functions' return value is actually
a bool. Convert their return type to reflect that.

Suggested-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Signed-off-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Reviewed-by: Metin Kaya <metin.kaya@arm.com>
Link: https://lore.kernel.org/r/20240610192018.1567075-3-qyousef@layalina.io
---
 include/linux/sched/deadline.h |  8 +++-----
 include/linux/sched/rt.h       | 16 ++++++----------
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 5cb88b748ad6..3a912ab42bb5 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -10,18 +10,16 @@
 
 #include <linux/sched.h>
 
-static inline int dl_prio(int prio)
+static inline bool dl_prio(int prio)
 {
-	if (unlikely(prio < MAX_DL_PRIO))
-		return 1;
-	return 0;
+	return unlikely(prio < MAX_DL_PRIO);
 }
 
 /*
  * Returns true if a task has a priority that belongs to DL class. PI-boosted
  * tasks will return true. Use dl_policy() to ignore PI-boosted tasks.
  */
-static inline int dl_task(struct task_struct *p)
+static inline bool dl_task(struct task_struct *p)
 {
 	return dl_prio(p->prio);
 }
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a055dd68a77c..91ef1ef2019f 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -6,25 +6,21 @@
 
 struct task_struct;
 
-static inline int rt_prio(int prio)
+static inline bool rt_prio(int prio)
 {
-	if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO))
-		return 1;
-	return 0;
+	return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO);
 }
 
-static inline int realtime_prio(int prio)
+static inline bool realtime_prio(int prio)
 {
-	if (unlikely(prio < MAX_RT_PRIO))
-		return 1;
-	return 0;
+	return unlikely(prio < MAX_RT_PRIO);
 }
 
 /*
  * Returns true if a task has a priority that belongs to RT class. PI-boosted
  * tasks will return true. Use rt_policy() to ignore PI-boosted tasks.
  */
-static inline int rt_task(struct task_struct *p)
+static inline bool rt_task(struct task_struct *p)
 {
 	return rt_prio(p->prio);
 }
@@ -34,7 +30,7 @@ static inline int rt_task(struct task_struct *p)
  * PI-boosted tasks will return true. Use realtime_task_policy() to ignore
  * PI-boosted tasks.
  */
-static inline int realtime_task(struct task_struct *p)
+static inline bool realtime_task(struct task_struct *p)
 {
 	return realtime_prio(p->prio);
 }
-- 
cgit v1.2.3


From ae04f69de0bef93c7086cf2983dbc8e8fd624ebe Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@layalina.io>
Date: Mon, 10 Jun 2024 20:20:18 +0100
Subject: sched/rt: Rename realtime_{prio, task}() to rt_or_dl_{prio, task}()

Some find the name realtime overloaded. Use rt_or_dl() as an
alternative, hopefully better, name.

Suggested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240610192018.1567075-4-qyousef@layalina.io
---
 include/linux/ioprio.h   |  2 +-
 include/linux/sched/rt.h | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 75859b78d540..b25377b6ea98 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task)
 {
 	if (task->policy == SCHED_IDLE)
 		return IOPRIO_CLASS_IDLE;
-	else if (realtime_task_policy(task))
+	else if (rt_or_dl_task_policy(task))
 		return IOPRIO_CLASS_RT;
 	else
 		return IOPRIO_CLASS_BE;
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 91ef1ef2019f..4e3338103654 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -11,7 +11,7 @@ static inline bool rt_prio(int prio)
 	return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO);
 }
 
-static inline bool realtime_prio(int prio)
+static inline bool rt_or_dl_prio(int prio)
 {
 	return unlikely(prio < MAX_RT_PRIO);
 }
@@ -27,19 +27,19 @@ static inline bool rt_task(struct task_struct *p)
 
 /*
  * Returns true if a task has a priority that belongs to RT or DL classes.
- * PI-boosted tasks will return true. Use realtime_task_policy() to ignore
+ * PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore
  * PI-boosted tasks.
  */
-static inline bool realtime_task(struct task_struct *p)
+static inline bool rt_or_dl_task(struct task_struct *p)
 {
-	return realtime_prio(p->prio);
+	return rt_or_dl_prio(p->prio);
 }
 
 /*
  * Returns true if a task has a policy that belongs to RT or DL classes.
  * PI-boosted tasks will return false.
  */
-static inline bool realtime_task_policy(struct task_struct *tsk)
+static inline bool rt_or_dl_task_policy(struct task_struct *tsk)
 {
 	int policy = tsk->policy;
 
-- 
cgit v1.2.3


From 6e2fdceffdc6bd7b8ba314a1d1b976721533c8f9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 26 Jul 2024 14:42:08 -0400
Subject: tracing: Use refcount for trace_event_file reference counter

Instead of using an atomic counter for the trace_event_file reference
counter, use the refcount interface. It has various checks to make sure
the reference counting is correct, and will warn if it detects an error
(like refcount_inc() on '0').

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20240726144208.687cce24@rorschach.local.home
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 9df3e2973626..fed58e54f15e 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -680,7 +680,7 @@ struct trace_event_file {
 	 * caching and such. Which is mostly OK ;-)
 	 */
 	unsigned long		flags;
-	atomic_t		ref;	/* ref count for opened files */
+	refcount_t		ref;	/* ref count for opened files */
 	atomic_t		sm_ref;	/* soft-mode reference counter */
 	atomic_t		tm_ref;	/* trigger-mode reference counter */
 };
-- 
cgit v1.2.3


From d507ae0dc83b7f43cdf6760b8f1a30aac4fc405a Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Fri, 2 Aug 2024 11:13:19 +0530
Subject: drm/buddy: Add start address support to trim function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add a new start parameter in trim function to specify exact
  address from where to start the trimming. This would help us
  in situations like if drivers would like to do address alignment
  for specific requirements.

- Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this
  flag to disable the allocator trimming part. This patch enables
  the drivers control trimming and they can do it themselves
  based on the application requirements.

v1:(Matthew)
  - check new_start alignment with min chunk_size
  - use range_overflows()

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit db65eb46de135338d6177f8853e0fd208f19d63e)
---
 include/drm/drm_buddy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 2a74fa9d0ce5..9689a7c5dd36 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -27,6 +27,7 @@
 #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
 #define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
 #define DRM_BUDDY_CLEARED			BIT(4)
+#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
@@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   unsigned long flags);
 
 int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 *start,
 			 u64 new_size,
 			 struct list_head *blocks);
 
-- 
cgit v1.2.3


From c2c0b67dca3cb3b3cea0dd60075a1c5ba77e2fcd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 7 Aug 2024 17:02:32 +0200
Subject: ASoC: tas2781-i2c: Drop weird GPIO code

The tas2781-i2c driver gets an IRQ from either ACPI or device tree,
then proceeds to check if the IRQ has a corresponding GPIO and in
case it does enforce the GPIO as input and set a label on it.

This is abuse of the API:

- First we cannot guarantee that the numberspaces of the GPIOs and
  the IRQs are the same, i.e that an IRQ number corresponds to
  a GPIO number like that.

- Second, GPIO chips and IRQ chips should be treated as orthogonal
  APIs, the irqchip needs to ascertain that the backing GPIO line
  is set to input etc just using the irqchip.

- Third it is using the legacy <linux/gpio.h> API which should not
  be used in new code yet this was added just a year ago.

Delete the offending code.

If this creates problems the GPIO and irqchip maintainers can help
to fix the issues.

It *should* not create any problems, because the irq isn't
used anywhere in the driver, it's just obtained and then
left unused.

Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://patch.msgid.link/20240807-asoc-tas-gpios-v2-1-bd0f2705d58b@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 18161d02a96f..dbda552398b5 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -81,11 +81,6 @@ struct tasdevice {
 	bool is_loaderr;
 };
 
-struct tasdevice_irqinfo {
-	int irq_gpio;
-	int irq;
-};
-
 struct calidata {
 	unsigned char *data;
 	unsigned long total_sz;
@@ -93,7 +88,6 @@ struct calidata {
 
 struct tasdevice_priv {
 	struct tasdevice tasdevice[TASDEVICE_MAX_CHANNELS];
-	struct tasdevice_irqinfo irq_info;
 	struct tasdevice_rca rcabin;
 	struct calidata cali_data;
 	struct tasdevice_fw *fmw;
@@ -115,6 +109,7 @@ struct tasdevice_priv {
 	unsigned int chip_id;
 	unsigned int sysclk;
 
+	int irq;
 	int cur_prog;
 	int cur_conf;
 	int fw_state;
-- 
cgit v1.2.3


From 58f7e4d7ba32758b861807e77535853cacc1f426 Mon Sep 17 00:00:00 2001
From: Jianhui Zhou <912460177@qq.com>
Date: Mon, 5 Aug 2024 19:36:31 +0800
Subject: ring-buffer: Remove unused function ring_buffer_nr_pages()

Because ring_buffer_nr_pages() is not an inline function and user accesses
buffer->buffers[cpu]->nr_pages directly, the function ring_buffer_nr_pages
is removed.

Signed-off-by: Jianhui Zhou <912460177@qq.com>
Link: https://lore.kernel.org/tencent_F4A7E9AB337F44E0F4B858D07D19EF460708@qq.com
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 96d2140b471e..fd35d4ec12e1 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer,
 void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs);
 bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer);
 
-size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu);
 size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu);
 
 struct buffer_data_read_page;
-- 
cgit v1.2.3


From b426b3ba9f6fe17990893c5324727dd217d420b6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:00 +0200
Subject: ALSA: vx_core: Drop unused dev field

The vx_core.dev field has never been set but referred incorrectly at
firmware loading.  Pass the proper device pointer from card->dev at
request_firmware(), and drop the unused dev field from vx_core, too.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-11-tiwai@suse.de
---
 include/sound/vx_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/vx_core.h b/include/sound/vx_core.h
index 1ddd3036bdfc..ca87fa6a8135 100644
--- a/include/sound/vx_core.h
+++ b/include/sound/vx_core.h
@@ -155,7 +155,6 @@ struct vx_core {
 	unsigned int chip_status;
 	unsigned int pcm_running;
 
-	struct device *dev;
 	struct snd_hwdep *hwdep;
 
 	struct vx_rmh irq_rmh;	/* RMH used in interrupts */
-- 
cgit v1.2.3


From 7f7eff209ee283eec9ae56e9c1446dd1ac3e1022 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:14 +0200
Subject: ALSA: es1688: Use standard print API

Use the standard print API with dev_*() instead of the old house-baked
one.  It gives better information and allows dynamically control of
debug prints.

For referring to the device, introduce snd_card pointer to struct
snd_es1688.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-25-tiwai@suse.de
---
 include/sound/es1688.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/es1688.h b/include/sound/es1688.h
index 099569c31fbb..425a3717d77a 100644
--- a/include/sound/es1688.h
+++ b/include/sound/es1688.h
@@ -17,6 +17,7 @@
 #define ES1688_HW_UNDEF	0x0003
 
 struct snd_es1688 {
+	struct snd_card *card;
 	unsigned long port;		/* port of ESS chip */
 	struct resource *res_port;
 	unsigned long mpu_port;		/* MPU-401 port of ESS chip */
-- 
cgit v1.2.3


From 40b15de3c4f23994f53f930e94939c7b6a0cc52f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:19 +0200
Subject: ALSA: opti9xx: Use standard print API

Use the standard print API with dev_*() instead of the old house-baked
one.  It gives better information and allows dynamically control of
debug prints.

The card pointer is stored in struct snd_opti9xx and snd_miro to be
referred for dev_*() calls.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-30-tiwai@suse.de
---
 include/sound/aci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/aci.h b/include/sound/aci.h
index 6ebbd4223f12..36a761c9820d 100644
--- a/include/sound/aci.h
+++ b/include/sound/aci.h
@@ -72,6 +72,7 @@
 #define ACI_SET_EQ7		0x46	/* ... to Treble */
 
 struct snd_miro_aci {
+	struct snd_card *card;
 	unsigned long aci_port;
 	int aci_vendor;
 	int aci_product;
-- 
cgit v1.2.3


From 8b4ac5429938dd5f1fbf2eea0687f08cbcccb6be Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:25 +0200
Subject: ALSA: wavefront: Use standard print API

Use the standard print API with dev_*() instead of the old house-baked
one.  It gives better information and allows dynamically control of
debug prints.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-36-tiwai@suse.de
---
 include/sound/snd_wavefront.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h
index 55053557c898..27f7e8a477c2 100644
--- a/include/sound/snd_wavefront.h
+++ b/include/sound/snd_wavefront.h
@@ -137,8 +137,4 @@ extern int  snd_wavefront_fx_ioctl  (struct snd_hwdep *,
 extern int snd_wavefront_fx_open    (struct snd_hwdep *, struct file *);
 extern int snd_wavefront_fx_release (struct snd_hwdep *, struct file *);
 
-/* prefix in all snd_printk() delivered messages */
-
-#define LOGNAME "WaveFront: "
-
 #endif  /* __SOUND_SND_WAVEFRONT_H__ */
-- 
cgit v1.2.3


From fea1510719dd2a33e0ffa1eae6118c5437f7071d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:33 +0200
Subject: ALSA: emux: Use standard print API

Use the standard print API with dev_*() instead of the old house-baked
one.  It gives better information and allows dynamically control of
debug prints.

Some functions are changed to receive snd_card object for calling
dev_*() functions, too.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-44-tiwai@suse.de
---
 include/sound/soundfont.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soundfont.h b/include/sound/soundfont.h
index 98ed98d89d6d..8a40cc15f66d 100644
--- a/include/sound/soundfont.h
+++ b/include/sound/soundfont.h
@@ -86,9 +86,11 @@ struct snd_sf_list {
 };
 
 /* Prototypes for soundfont.c */
-int snd_soundfont_load(struct snd_sf_list *sflist, const void __user *data,
+int snd_soundfont_load(struct snd_card *card,
+		       struct snd_sf_list *sflist, const void __user *data,
 		       long count, int client);
-int snd_soundfont_load_guspatch(struct snd_sf_list *sflist, const char __user *data,
+int snd_soundfont_load_guspatch(struct snd_card *card,
+				struct snd_sf_list *sflist, const char __user *data,
 				long count);
 int snd_soundfont_close_check(struct snd_sf_list *sflist, int client);
 
-- 
cgit v1.2.3


From 504dc9f5e62e3beef2554c453576e84993053647 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 15:34:44 +0200
Subject: ALSA: core: Drop snd_print stuff and co

Now that all users of snd_print*() are gone, let's drop the functions
completely.  This also makes CONFIG_SND_VERBOSE_PRINTK redundant, and
it's dropped, too.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807133452.9424-55-tiwai@suse.de
---
 include/sound/core.h | 67 ----------------------------------------------------
 1 file changed, 67 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index dfef0c9d4b9f..23401aaf3dc2 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -345,45 +345,7 @@ void release_and_free_resource(struct resource *res);
 
 /* --- */
 
-/* sound printk debug levels */
-enum {
-	SND_PR_ALWAYS,
-	SND_PR_DEBUG,
-	SND_PR_VERBOSE,
-};
-
-#if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK)
-__printf(4, 5)
-void __snd_printk(unsigned int level, const char *file, int line,
-		  const char *format, ...);
-#else
-#define __snd_printk(level, file, line, format, ...) \
-	printk(format, ##__VA_ARGS__)
-#endif
-
-/**
- * snd_printk - printk wrapper
- * @fmt: format string
- *
- * Works like printk() but prints the file and the line of the caller
- * when configured with CONFIG_SND_VERBOSE_PRINTK.
- */
-#define snd_printk(fmt, ...) \
-	__snd_printk(0, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-
 #ifdef CONFIG_SND_DEBUG
-/**
- * snd_printd - debug printk
- * @fmt: format string
- *
- * Works like snd_printk() for debugging purposes.
- * Ignored when CONFIG_SND_DEBUG is not set.
- */
-#define snd_printd(fmt, ...) \
-	__snd_printk(1, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-#define _snd_printd(level, fmt, ...) \
-	__snd_printk(level, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-
 /**
  * snd_BUG - give a BUG warning message and stack trace
  *
@@ -392,12 +354,6 @@ void __snd_printk(unsigned int level, const char *file, int line,
  */
 #define snd_BUG()		WARN(1, "BUG?\n")
 
-/**
- * snd_printd_ratelimit - Suppress high rates of output when
- * 			  CONFIG_SND_DEBUG is enabled.
- */
-#define snd_printd_ratelimit() printk_ratelimit()
-
 /**
  * snd_BUG_ON - debugging check macro
  * @cond: condition to evaluate
@@ -409,11 +365,6 @@ void __snd_printk(unsigned int level, const char *file, int line,
 
 #else /* !CONFIG_SND_DEBUG */
 
-__printf(1, 2)
-static inline void snd_printd(const char *format, ...) {}
-__printf(2, 3)
-static inline void _snd_printd(int level, const char *format, ...) {}
-
 #define snd_BUG()			do { } while (0)
 
 #define snd_BUG_ON(condition) ({ \
@@ -421,26 +372,8 @@ static inline void _snd_printd(int level, const char *format, ...) {}
 	unlikely(__ret_warn_on); \
 })
 
-static inline bool snd_printd_ratelimit(void) { return false; }
-
 #endif /* CONFIG_SND_DEBUG */
 
-#ifdef CONFIG_SND_DEBUG_VERBOSE
-/**
- * snd_printdd - debug printk
- * @format: format string
- *
- * Works like snd_printk() for debugging purposes.
- * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set.
- */
-#define snd_printdd(format, ...) \
-	__snd_printk(2, __FILE__, __LINE__, format, ##__VA_ARGS__)
-#else
-__printf(1, 2)
-static inline void snd_printdd(const char *format, ...) {}
-#endif
-
-
 #define SNDRV_OSS_VERSION         ((3<<16)|(8<<8)|(1<<4)|(0))	/* 3.8.1a */
 
 /* for easier backward-porting */
-- 
cgit v1.2.3


From c772a2c690182410642ead740f7a84b3a7544b2b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:10 +0300
Subject: net/mlx5: Add IFC related stuff for data direct

Add IFC related stuff for data direct.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/82da7f578a567909bb5858a64ba844fe4cc298fa.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 51 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cab228cf51c6..970c9d8473ef 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -313,6 +313,7 @@ enum {
 	MLX5_CMD_OP_MODIFY_VHCA_STATE             = 0xb0e,
 	MLX5_CMD_OP_SYNC_CRYPTO                   = 0xb12,
 	MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS       = 0xb16,
+	MLX5_CMD_OPCODE_QUERY_VUID                = 0xb22,
 	MLX5_CMD_OP_MAX
 };
 
@@ -1885,7 +1886,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_5a0[0x10];
 	u8         enhanced_cqe_compression[0x1];
-	u8         reserved_at_5b1[0x2];
+	u8         reserved_at_5b1[0x1];
+	u8         crossing_vhca_mkey[0x1];
 	u8         log_max_dek[0x5];
 	u8         reserved_at_5b8[0x4];
 	u8         mini_cqe_resp_stride_index[0x1];
@@ -1954,7 +1956,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   dynamic_msix_table_size[0xc];
 	u8	   reserved_at_740[0xc];
 	u8	   min_dynamic_vf_msix_table_size[0x4];
-	u8	   reserved_at_750[0x4];
+	u8	   reserved_at_750[0x2];
+	u8	   data_direct[0x1];
+	u8	   reserved_at_753[0x1];
 	u8	   max_dynamic_vf_msix_table_size[0xc];
 
 	u8         reserved_at_760[0x3];
@@ -1982,7 +1986,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_0[0x80];
 
 	u8         migratable[0x1];
-	u8         reserved_at_81[0x1f];
+	u8         reserved_at_81[0x11];
+	u8         query_vuid[0x1];
+	u8         reserved_at_93[0xd];
 
 	u8	   max_reformat_insert_size[0x8];
 	u8	   max_reformat_insert_offset[0x8];
@@ -4154,6 +4160,7 @@ enum {
 	MLX5_MKC_ACCESS_MODE_KSM   = 0x3,
 	MLX5_MKC_ACCESS_MODE_SW_ICM = 0x4,
 	MLX5_MKC_ACCESS_MODE_MEMIC = 0x5,
+	MLX5_MKC_ACCESS_MODE_CROSSING = 0x6,
 };
 
 struct mlx5_ifc_mkc_bits {
@@ -4196,7 +4203,10 @@ struct mlx5_ifc_mkc_bits {
 
 	u8         bsf_octword_size[0x20];
 
-	u8         reserved_at_120[0x80];
+	u8         reserved_at_120[0x60];
+
+	u8         crossing_target_vhca_id[0x10];
+	u8         reserved_at_190[0x10];
 
 	u8         translations_octword_size[0x20];
 
@@ -5124,6 +5134,36 @@ struct mlx5_ifc_query_vport_state_out_bits {
 	u8         state[0x4];
 };
 
+struct mlx5_ifc_array1024_auto_bits {
+	u8         array1024_auto[32][0x20];
+};
+
+struct mlx5_ifc_query_vuid_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x40];
+
+	u8         query_vfs_vuid[0x1];
+	u8         data_direct[0x1];
+	u8         reserved_at_62[0xe];
+	u8         vhca_id[0x10];
+};
+
+struct mlx5_ifc_query_vuid_out_bits {
+	u8        status[0x8];
+	u8        reserved_at_8[0x18];
+
+	u8        syndrome[0x20];
+
+	u8        reserved_at_40[0x1a0];
+
+	u8        reserved_at_1e0[0x10];
+	u8        num_of_entries[0x10];
+
+	struct mlx5_ifc_array1024_auto_bits vuid[];
+};
+
 enum {
 	MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT  = 0x0,
 	MLX5_VPORT_STATE_OP_MOD_ESW_VPORT   = 0x1,
@@ -8989,7 +9029,8 @@ struct mlx5_ifc_create_mkey_in_bits {
 
 	u8         pg_access[0x1];
 	u8         mkey_umem_valid[0x1];
-	u8         reserved_at_62[0x1e];
+	u8         data_direct[0x1];
+	u8         reserved_at_63[0x1d];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-- 
cgit v1.2.3


From abf3a3ea9acb5c886c8729191a670744ecd42024 Mon Sep 17 00:00:00 2001
From: David Virag <virag.david003@gmail.com>
Date: Tue, 6 Aug 2024 14:11:44 +0200
Subject: dt-bindings: clock: exynos7885: Fix duplicated binding

The numbering in Exynos7885's FSYS CMU bindings has 4 duplicated by
accident, with the rest of the bindings continuing with 5.

Fix this by moving CLK_MOUT_FSYS_USB30DRD_USER to the end as 11.

Since CLK_MOUT_FSYS_USB30DRD_USER is not used in any device tree as of
now, and there are no other clocks affected (maybe apart from
CLK_MOUT_FSYS_MMC_SDIO_USER which the number was shared with, also not
used in a device tree), this is the least impactful way to solve this
problem.

Fixes: cd268e309c29 ("dt-bindings: clock: Add bindings for Exynos7885 CMU_FSYS")
Cc: stable@vger.kernel.org
Signed-off-by: David Virag <virag.david003@gmail.com>
Link: https://lore.kernel.org/r/20240806121157.479212-2-virag.david003@gmail.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/exynos7885.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h
index 255e3aa94323..54cfccff8508 100644
--- a/include/dt-bindings/clock/exynos7885.h
+++ b/include/dt-bindings/clock/exynos7885.h
@@ -136,12 +136,12 @@
 #define CLK_MOUT_FSYS_MMC_CARD_USER	2
 #define CLK_MOUT_FSYS_MMC_EMBD_USER	3
 #define CLK_MOUT_FSYS_MMC_SDIO_USER	4
-#define CLK_MOUT_FSYS_USB30DRD_USER	4
 #define CLK_GOUT_MMC_CARD_ACLK		5
 #define CLK_GOUT_MMC_CARD_SDCLKIN	6
 #define CLK_GOUT_MMC_EMBD_ACLK		7
 #define CLK_GOUT_MMC_EMBD_SDCLKIN	8
 #define CLK_GOUT_MMC_SDIO_ACLK		9
 #define CLK_GOUT_MMC_SDIO_SDCLKIN	10
+#define CLK_MOUT_FSYS_USB30DRD_USER	11
 
 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */
-- 
cgit v1.2.3


From 59baa83e30f82b74b4c7dc07c20eac9899b6c0c6 Mon Sep 17 00:00:00 2001
From: David Virag <virag.david003@gmail.com>
Date: Tue, 6 Aug 2024 14:11:45 +0200
Subject: dt-bindings: clock: exynos7885: Add CMU_TOP PLL MUX indices

Add indices for missing MUX clocks from PLLs in CMU_TOP.

Signed-off-by: David Virag <virag.david003@gmail.com>
Link: https://lore.kernel.org/r/20240806121157.479212-3-virag.david003@gmail.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/exynos7885.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h
index 54cfccff8508..4ce86810b10d 100644
--- a/include/dt-bindings/clock/exynos7885.h
+++ b/include/dt-bindings/clock/exynos7885.h
@@ -69,6 +69,8 @@
 #define CLK_GOUT_FSYS_MMC_EMBD		58
 #define CLK_GOUT_FSYS_MMC_SDIO		59
 #define CLK_GOUT_FSYS_USB30DRD		60
+#define CLK_MOUT_SHARED0_PLL		61
+#define CLK_MOUT_SHARED1_PLL		62
 
 /* CMU_CORE */
 #define CLK_MOUT_CORE_BUS_USER			1
-- 
cgit v1.2.3


From b9dee49cc6f9efa97eee059d03b704dec0f45658 Mon Sep 17 00:00:00 2001
From: David Virag <virag.david003@gmail.com>
Date: Tue, 6 Aug 2024 14:11:46 +0200
Subject: dt-bindings: clock: exynos7885: Add indices for USB clocks

Exynos7885 SoC has a DWC3 USB Controller with Exynos USB PHY which in
theory supports USB3 SuperSpeed, but is only used as USB2 in all known
devices.

These, of course, need some clocks.
Add indices for these clocks.

Signed-off-by: David Virag <virag.david003@gmail.com>
Link: https://lore.kernel.org/r/20240806121157.479212-4-virag.david003@gmail.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/exynos7885.h | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h
index 4ce86810b10d..cfede84b46b9 100644
--- a/include/dt-bindings/clock/exynos7885.h
+++ b/include/dt-bindings/clock/exynos7885.h
@@ -134,16 +134,24 @@
 #define CLK_GOUT_WDT1_PCLK		43
 
 /* CMU_FSYS */
-#define CLK_MOUT_FSYS_BUS_USER		1
-#define CLK_MOUT_FSYS_MMC_CARD_USER	2
-#define CLK_MOUT_FSYS_MMC_EMBD_USER	3
-#define CLK_MOUT_FSYS_MMC_SDIO_USER	4
-#define CLK_GOUT_MMC_CARD_ACLK		5
-#define CLK_GOUT_MMC_CARD_SDCLKIN	6
-#define CLK_GOUT_MMC_EMBD_ACLK		7
-#define CLK_GOUT_MMC_EMBD_SDCLKIN	8
-#define CLK_GOUT_MMC_SDIO_ACLK		9
-#define CLK_GOUT_MMC_SDIO_SDCLKIN	10
-#define CLK_MOUT_FSYS_USB30DRD_USER	11
+#define CLK_MOUT_FSYS_BUS_USER			1
+#define CLK_MOUT_FSYS_MMC_CARD_USER		2
+#define CLK_MOUT_FSYS_MMC_EMBD_USER		3
+#define CLK_MOUT_FSYS_MMC_SDIO_USER		4
+#define CLK_GOUT_MMC_CARD_ACLK			5
+#define CLK_GOUT_MMC_CARD_SDCLKIN		6
+#define CLK_GOUT_MMC_EMBD_ACLK			7
+#define CLK_GOUT_MMC_EMBD_SDCLKIN		8
+#define CLK_GOUT_MMC_SDIO_ACLK			9
+#define CLK_GOUT_MMC_SDIO_SDCLKIN		10
+#define CLK_MOUT_FSYS_USB30DRD_USER		11
+#define CLK_MOUT_USB_PLL			12
+#define CLK_FOUT_USB_PLL			13
+#define CLK_FSYS_USB20PHY_CLKCORE		14
+#define CLK_FSYS_USB30DRD_ACLK_20PHYCTRL	15
+#define CLK_FSYS_USB30DRD_ACLK_30PHYCTRL_0	16
+#define CLK_FSYS_USB30DRD_ACLK_30PHYCTRL_1	17
+#define CLK_FSYS_USB30DRD_BUS_CLK_EARLY		18
+#define CLK_FSYS_USB30DRD_REF_CLK		19
 
 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */
-- 
cgit v1.2.3


From 599f6899051cb70c4e0aa9fd591b9ee220cb6f14 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 7 Aug 2024 09:22:10 +0200
Subject: media: uapi/linux/cec.h: cec_msg_set_reply_to: zero flags

The cec_msg_set_reply_to() helper function never zeroed the
struct cec_msg flags field, this can cause unexpected behavior
if flags was uninitialized to begin with.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: 0dbacebede1e ("[media] cec: move the CEC framework out of staging and to media")
Cc: <stable@vger.kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/cec.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h
index 894fffc66f2c..b2af1dddd4d7 100644
--- a/include/uapi/linux/cec.h
+++ b/include/uapi/linux/cec.h
@@ -132,6 +132,8 @@ static inline void cec_msg_init(struct cec_msg *msg,
  * Set the msg destination to the orig initiator and the msg initiator to the
  * orig destination. Note that msg and orig may be the same pointer, in which
  * case the change is done in place.
+ *
+ * It also zeroes the reply, timeout and flags fields.
  */
 static inline void cec_msg_set_reply_to(struct cec_msg *msg,
 					struct cec_msg *orig)
@@ -139,7 +141,9 @@ static inline void cec_msg_set_reply_to(struct cec_msg *msg,
 	/* The destination becomes the initiator and vice versa */
 	msg->msg[0] = (cec_msg_destination(orig) << 4) |
 		      cec_msg_initiator(orig);
-	msg->reply = msg->timeout = 0;
+	msg->reply = 0;
+	msg->timeout = 0;
+	msg->flags = 0;
 }
 
 /**
-- 
cgit v1.2.3


From a09cdb8f564613769142a60400bb5160864c3269 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 8 Aug 2024 12:41:17 +0200
Subject: genirq: Remove unused irq_chip_generic:: {type,polarity}_cache

The type_cache and polarity_cache members of struct irq_chip_generic are
unused. Remove them both along with their kernel-doc.

Found by https://github.com/jirislaby/clang-struct.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240808104118.430670-2-jirislaby@kernel.org
---
 include/linux/irq.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1f5dbf1f92c9..00490d6ead65 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1040,8 +1040,6 @@ struct irq_chip_type {
  * @irq_base:		Interrupt base nr for this chip
  * @irq_cnt:		Number of interrupts handled by this chip
  * @mask_cache:		Cached mask register shared between all chip types
- * @type_cache:		Cached type register
- * @polarity_cache:	Cached polarity register
  * @wake_enabled:	Interrupt can wakeup from suspend
  * @wake_active:	Interrupt is marked as an wakeup from suspend source
  * @num_ct:		Number of available irq_chip_type instances (usually 1)
@@ -1068,8 +1066,6 @@ struct irq_chip_generic {
 	unsigned int		irq_base;
 	unsigned int		irq_cnt;
 	u32			mask_cache;
-	u32			type_cache;
-	u32			polarity_cache;
 	u32			wake_enabled;
 	u32			wake_active;
 	unsigned int		num_ct;
-- 
cgit v1.2.3


From 60029162a0458832ab2bcfc6fd4986bfd9ca0f55 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 8 Aug 2024 12:41:18 +0200
Subject: genirq: Remove irq_chip_regs:: Polarity

The polarity member of struct irq_chip_regs is unused. Remove it along
with its kernel-doc.

Found by https://github.com/jirislaby/clang-struct.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240808104118.430670-3-jirislaby@kernel.org
---
 include/linux/irq.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 00490d6ead65..fa711f80957b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -991,7 +991,6 @@ void irq_init_desc(unsigned int irq);
  * @ack:	Ack register offset to reg_base
  * @eoi:	Eoi register offset to reg_base
  * @type:	Type configuration register offset to reg_base
- * @polarity:	Polarity configuration register offset to reg_base
  */
 struct irq_chip_regs {
 	unsigned long		enable;
@@ -1000,7 +999,6 @@ struct irq_chip_regs {
 	unsigned long		ack;
 	unsigned long		eoi;
 	unsigned long		type;
-	unsigned long		polarity;
 };
 
 /**
-- 
cgit v1.2.3


From b54de55990b0467538c6bb33523b28816384958a Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Wed, 7 Aug 2024 17:06:12 +0100
Subject: net: ethtool: fix off-by-one error in max RSS context IDs

Both ethtool_ops.rxfh_max_context_id and the default value used when
 it's not specified are supposed to be exclusive maxima (the former
 is documented as such; the latter, U32_MAX, cannot be used as an ID
 since it equals ETH_RXFH_CONTEXT_ALLOC), but xa_alloc() expects an
 inclusive maximum.
Subtract one from 'limit' to produce an inclusive maximum, and pass
 that to xa_alloc().
Increase bnxt's max by one to prevent a (very minor) regression, as
 BNXT_MAX_ETH_RSS_CTX is an inclusive max.  This is safe since bnxt
 is not actually hard-limited; BNXT_MAX_ETH_RSS_CTX is just a
 leftover from old driver code that managed context IDs itself.
Rename rxfh_max_context_id to rxfh_max_num_contexts to make its
 semantics (hopefully) more obvious.

Fixes: 847a8ab18676 ("net: ethtool: let the core choose RSS context IDs")
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Link: https://patch.msgid.link/5a2d11a599aa5b0cc6141072c01accfb7758650c.1723045898.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 303fda54ef17..989c94eddb2b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info {
  * @rxfh_key_space: same as @rxfh_indir_space, but for the key.
  * @rxfh_priv_size: size of the driver private data area the core should
  *	allocate for an RSS context (in &struct ethtool_rxfh_context).
- * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID.  If this
- *	is zero then the core may choose any (nonzero) ID, otherwise the core
- *	will only use IDs strictly less than this value, as the @rss_context
- *	argument to @create_rxfh_context and friends.
+ * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID.
+ *	If this is zero then the core may choose any (nonzero) ID, otherwise
+ *	the core will only use IDs strictly less than this value, as the
+ *	@rss_context argument to @create_rxfh_context and friends.
  * @supported_coalesce_params: supported types of interrupt coalescing.
  * @supported_ring_params: supported ring params.
  * @get_drvinfo: Report driver/device information. Modern drivers no
@@ -954,7 +954,7 @@ struct ethtool_ops {
 	u32	rxfh_indir_space;
 	u16	rxfh_key_space;
 	u16	rxfh_priv_size;
-	u32	rxfh_max_context_id;
+	u32	rxfh_max_num_contexts;
 	u32	supported_coalesce_params;
 	u32	supported_ring_params;
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
-- 
cgit v1.2.3


From 5819e464a17587e6830cfab05f3e91a9a8753a41 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 8 Aug 2024 14:08:08 +1000
Subject: cpumask: Fix crash on updating CPU enabled mask

The CPU enabled mask instead of the CPU possible mask should be used
by set_cpu_enabled(). Otherwise, we run into crash due to write to
the read-only CPU possible mask when vCPU is hot added on ARM64.

  (qemu) device_add host-arm-cpu,id=cpu1,socket-id=1
  Unable to handle kernel write to read-only memory at virtual address ffff800080fa7190
    :
  Call trace:
    register_cpu+0x1a4/0x2e8
    arch_register_cpu+0x84/0xd8
    acpi_processor_add+0x480/0x5b0
    acpi_bus_attach+0x1c4/0x300
    acpi_dev_for_one_check+0x3c/0x50
    device_for_each_child+0x68/0xc8
    acpi_dev_for_each_child+0x48/0x80
    acpi_bus_attach+0x84/0x300
    acpi_bus_scan+0x74/0x220
    acpi_scan_rescan_bus+0x54/0x88
    acpi_device_hotplug+0x208/0x478
    acpi_hotplug_work_fn+0x2c/0x50
    process_one_work+0x15c/0x3c0
    worker_thread+0x2ec/0x400
    kthread+0x120/0x130
    ret_from_fork+0x10/0x20

Fix it by passing the CPU enabled mask instead of the CPU possible
mask to set_cpu_enabled().

Fixes: 51c4767503d5 ("Merge tag 'bitmap-6.11-rc1' of https://github.com:/norov/linux")
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 801a7e524113..53158de44b83 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1037,7 +1037,7 @@ void init_cpu_online(const struct cpumask *src);
 	assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val))
 
 #define set_cpu_possible(cpu, possible)	assign_cpu((cpu), &__cpu_possible_mask, (possible))
-#define set_cpu_enabled(cpu, enabled)	assign_cpu((cpu), &__cpu_possible_mask, (enabled))
+#define set_cpu_enabled(cpu, enabled)	assign_cpu((cpu), &__cpu_enabled_mask, (enabled))
 #define set_cpu_present(cpu, present)	assign_cpu((cpu), &__cpu_present_mask, (present))
 #define set_cpu_active(cpu, active)	assign_cpu((cpu), &__cpu_active_mask, (active))
 #define set_cpu_dying(cpu, dying)	assign_cpu((cpu), &__cpu_dying_mask, (dying))
-- 
cgit v1.2.3


From 3882dccf48f9fbe787b5df3187d708ef348ac860 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Thu, 8 Aug 2024 16:05:57 +0100
Subject: bpf/bpf_get,set_sockopt: add option to set TCP-BPF sock ops flags

Currently the only opportunity to set sock ops flags dictating
which callbacks fire for a socket is from within a TCP-BPF sockops
program.  This is problematic if the connection is already set up
as there is no further chance to specify callbacks for that socket.
Add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_setsockopt() and bpf_getsockopt()
to allow users to specify callbacks later, either via an iterator
over sockets or via a socket-specific program triggered by a
setsockopt() on the socket.

Previous discussion on this here [1].

[1] https://lore.kernel.org/bpf/f42f157b-6e52-dd4d-3d97-9b86c84c0b00@oracle.com/

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/r/20240808150558.1035626-2-alan.maguire@oracle.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 35bcf52dbc65..e05b39e39c3f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2851,7 +2851,7 @@ union bpf_attr {
  * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
  * 		  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
  * 		  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * 		  **TCP_BPF_RTO_MIN**.
+ *		  **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports the following *optname*\ s:
  * 		  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
 	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
+	TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
 };
 
 enum {
-- 
cgit v1.2.3


From 09612576046a3cd29bd2b2b88c5813b1dfe96775 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 7 Aug 2024 14:22:26 +0200
Subject: net: sungem_phy: Constify struct mii_phy_def

'struct mii_phy_def' are not modified in this driver.

Constifying these structures moves some data to a read-only section, so
increase overall security.

While at it fix the checkpatch warning related to this patch (some missing
newlines and spaces around *)

On a x86_64, with allmodconfig:
Before:
======
  27709	    928	      0	  28637	   6fdd	drivers/net/sungem_phy.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  28157	    476	      0	  28633	   6fd9	drivers/net/sungem_phy.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/54c3b30930f80f4895e6fa2f4234714fdea4ef4e.1723033266.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sungem_phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h
index c505f30e8b68..eecc7eb63bfb 100644
--- a/include/linux/sungem_phy.h
+++ b/include/linux/sungem_phy.h
@@ -40,7 +40,7 @@ enum {
 /* An instance of a PHY, partially borrowed from mii_if_info */
 struct mii_phy
 {
-	struct mii_phy_def*	def;
+	const struct mii_phy_def *def;
 	u32			advertising;
 	int			mii_id;
 
-- 
cgit v1.2.3


From 5fcf329676cf9d132842d9871f83a091c32f3bfc Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 19 Jul 2024 13:41:50 +0200
Subject: fs: add put_mnt_ns() cleanup helper

Add a simple helper to put a mount namespace reference.

Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-3-834113cab0d2@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/mnt_namespace.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 8f882f5881e8..70b366b64816 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -3,6 +3,9 @@
 #define _NAMESPACE_H_
 #ifdef __KERNEL__
 
+#include <linux/cleanup.h>
+#include <linux/err.h>
+
 struct mnt_namespace;
 struct fs_struct;
 struct user_namespace;
@@ -11,6 +14,7 @@ struct ns_common;
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
+DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T))
 extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
 
 extern const struct file_operations proc_mounts_operations;
-- 
cgit v1.2.3


From 257b1c2c78c25643526609dee0c15f1544eb3252 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 19 Jul 2024 13:41:51 +0200
Subject: file: add fput() cleanup helper

Add a simple helper to put a file reference.

Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-4-834113cab0d2@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/file.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index 237931f20739..d1e768b06069 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -11,6 +11,7 @@
 #include <linux/posix_types.h>
 #include <linux/errno.h>
 #include <linux/cleanup.h>
+#include <linux/err.h>
 
 struct file;
 
@@ -96,6 +97,7 @@ extern void put_unused_fd(unsigned int fd);
 
 DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
 	     get_unused_fd_flags(flags), unsigned flags)
+DEFINE_FREE(fput, struct file *, if (!IS_ERR_OR_NULL(_T)) fput(_T))
 
 /*
  * take_fd() will take care to set @fd to -EBADF ensuring that
-- 
cgit v1.2.3


From a1d220d9dafa8d76ba60a784a1016c3134e6a1e8 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 19 Jul 2024 13:41:52 +0200
Subject: nsfs: iterate through mount namespaces

It is already possible to list mounts in other mount namespaces and to
retrieve namespace file descriptors without having to go through procfs
by deriving them from pidfds.

Augment these abilities by adding the ability to retrieve information
about a mount namespace via NS_MNT_GET_INFO. This will return the mount
namespace id and the number of mounts currently in the mount namespace.
The number of mounts can be used to size the buffer that needs to be
used for listmount() and is in general useful without having to actually
iterate through all the mounts. The structure is extensible.

And add the ability to iterate through all mount namespaces over which
the caller holds privilege returning the file descriptor for the next or
previous mount namespace.

To retrieve a mount namespace the caller must be privileged wrt to it's
owning user namespace. This means that PID 1 on the host can list all
mounts in all mount namespaces or that a container can list all mounts
of its nested containers.

Optionally pass a structure for NS_MNT_GET_INFO with
NS_MNT_GET_{PREV,NEXT} to retrieve information about the mount namespace
in one go. Both ioctls can be implemented for other namespace types
easily.

Together with recent api additions this means one can iterate through
all mounts in all mount namespaces without ever touching procfs.

Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-5-834113cab0d2@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/nsfs.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index b133211331f6..cb31af3c1840 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -3,6 +3,7 @@
 #define __LINUX_NSFS_H
 
 #include <linux/ioctl.h>
+#include <linux/types.h>
 
 #define NSIO	0xb7
 
@@ -26,4 +27,19 @@
 /* Return thread-group leader id of pid in the target pid namespace. */
 #define NS_GET_TGID_IN_PIDNS	_IOR(NSIO, 0x9, int)
 
+struct mnt_ns_info {
+	__u32 size;
+	__u32 nr_mounts;
+	__u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO		_IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT		_IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV		_IOR(NSIO, 12, struct mnt_ns_info)
+
 #endif /* __LINUX_NSFS_H */
-- 
cgit v1.2.3


From f428cc9eac6e29d57579be4978ba210c344322ea Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 9 Aug 2024 12:42:29 +0200
Subject: ALSA: control: Rename ctl_files_rwlock to controls_rwlock

We'll re-use the existing rwlock for the protection of control list
lookup, too, and now rename it to a more generic name.

This is a preliminary change, only the rename of the struct field
here, no functional changes.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240809104234.8488-2-tiwai@suse.de
---
 include/sound/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index dfef0c9d4b9f..55607e91d5fd 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -99,7 +99,7 @@ struct snd_card {
 	struct device *ctl_dev;		/* control device */
 	unsigned int last_numid;	/* last used numeric ID */
 	struct rw_semaphore controls_rwsem;	/* controls lock (list and values) */
-	rwlock_t ctl_files_rwlock;	/* ctl_files list lock */
+	rwlock_t controls_rwlock;	/* lock for ctl_files list */
 	int controls_count;		/* count of all controls */
 	size_t user_ctl_alloc_size;	// current memory allocation by user controls.
 	struct list_head controls;	/* all controls for this card */
-- 
cgit v1.2.3


From 38ea4c3dc306edf6f4e483e8ad9cb8d33943afde Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 9 Aug 2024 12:42:30 +0200
Subject: ALSA: control: Optimize locking for look-up

For a fast look-up of a control element via either numid or name
matching (enabled via CONFIG_SND_CTL_FAST_LOOKUP), a locking isn't
needed at all thanks to Xarray.  OTOH, the locking is still needed for
a slow linked-list traversal, and that's rather a rare case.

In this patch, we reduce the use of locking at snd_ctl_find_*() API
functions, and switch from controls_rwsem to controls_rwlock for
avoiding unnecessary lock inversions.  This also resulted in a nice
cleanup, as *_unlocked() version of snd_ctl_find_*() APIs can be
dropped.

snd_ctl_find_id_mixer_unlocked() is still left just as an alias of
snd_ctl_find_id_mixer(), since soc-card.c has a wrapper and there are
several users.  Once after converting there, we can remove it later.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240809104234.8488-3-tiwai@suse.de
---
 include/sound/control.h | 22 ++--------------------
 include/sound/core.h    |  2 +-
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/sound/control.h b/include/sound/control.h
index c1659036c4a7..4851a86c72ef 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -140,9 +140,7 @@ int snd_ctl_remove_id(struct snd_card * card, struct snd_ctl_elem_id *id);
 int snd_ctl_rename_id(struct snd_card * card, struct snd_ctl_elem_id *src_id, struct snd_ctl_elem_id *dst_id);
 void snd_ctl_rename(struct snd_card *card, struct snd_kcontrol *kctl, const char *name);
 int snd_ctl_activate_id(struct snd_card *card, struct snd_ctl_elem_id *id, int active);
-struct snd_kcontrol *snd_ctl_find_numid_locked(struct snd_card *card, unsigned int numid);
 struct snd_kcontrol *snd_ctl_find_numid(struct snd_card *card, unsigned int numid);
-struct snd_kcontrol *snd_ctl_find_id_locked(struct snd_card *card, const struct snd_ctl_elem_id *id);
 struct snd_kcontrol *snd_ctl_find_id(struct snd_card *card, const struct snd_ctl_elem_id *id);
 
 /**
@@ -167,27 +165,11 @@ snd_ctl_find_id_mixer(struct snd_card *card, const char *name)
 	return snd_ctl_find_id(card, &id);
 }
 
-/**
- * snd_ctl_find_id_mixer_locked - find the control instance with the given name string
- * @card: the card instance
- * @name: the name string
- *
- * Finds the control instance with the given name and
- * @SNDRV_CTL_ELEM_IFACE_MIXER. Other fields are set to zero.
- *
- * This is merely a wrapper to snd_ctl_find_id_locked().
- * The caller must down card->controls_rwsem before calling this function.
- *
- * Return: The pointer of the instance if found, or %NULL if not.
- */
+/* to be removed */
 static inline struct snd_kcontrol *
 snd_ctl_find_id_mixer_locked(struct snd_card *card, const char *name)
 {
-	struct snd_ctl_elem_id id = {};
-
-	id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-	strscpy(id.name, name, sizeof(id.name));
-	return snd_ctl_find_id_locked(card, &id);
+	return snd_ctl_find_id_mixer(card, name);
 }
 
 int snd_ctl_create(struct snd_card *card);
diff --git a/include/sound/core.h b/include/sound/core.h
index 55607e91d5fd..5c418ab24aa4 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -99,7 +99,7 @@ struct snd_card {
 	struct device *ctl_dev;		/* control device */
 	unsigned int last_numid;	/* last used numeric ID */
 	struct rw_semaphore controls_rwsem;	/* controls lock (list and values) */
-	rwlock_t controls_rwlock;	/* lock for ctl_files list */
+	rwlock_t controls_rwlock;	/* lock for lookup and ctl_files list */
 	int controls_count;		/* count of all controls */
 	size_t user_ctl_alloc_size;	// current memory allocation by user controls.
 	struct list_head controls;	/* all controls for this card */
-- 
cgit v1.2.3


From 9cacb32a0ba691c2859a20bd5e30b71cc592fad2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 9 Aug 2024 12:42:31 +0200
Subject: ASoC: Drop snd_soc_*_get_kcontrol_locked()

The recent cleanup in ALSA control core made no difference between
snd_ctl_find_id_mixer() and snd_ctl_find_id_mixer_locked(), and the
latter is to be dropped.  The only user of the left API was ASoC, and
that's snd_soc_card_get_kcontrol_locked() and
snd_soc_component_get_kcontrol_locked().

This patch drops those functions and rewrites those users to call the
variant without locked instead.  The test of the API became
superfluous, hence dropped as well.

As all callers of snd_ctl_find_id_mixer_locked() are gone,
snd_ctl_find_id_mixer_locked() is finally dropped, too.

Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240809104234.8488-4-tiwai@suse.de
---
 include/sound/control.h       | 7 -------
 include/sound/soc-card.h      | 2 --
 include/sound/soc-component.h | 3 ---
 3 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/sound/control.h b/include/sound/control.h
index 4851a86c72ef..e31bd8e36bfa 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -165,13 +165,6 @@ snd_ctl_find_id_mixer(struct snd_card *card, const char *name)
 	return snd_ctl_find_id(card, &id);
 }
 
-/* to be removed */
-static inline struct snd_kcontrol *
-snd_ctl_find_id_mixer_locked(struct snd_card *card, const char *name)
-{
-	return snd_ctl_find_id_mixer(card, name);
-}
-
 int snd_ctl_create(struct snd_card *card);
 
 int snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn);
diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h
index 1f4c39922d82..ecc02e955279 100644
--- a/include/sound/soc-card.h
+++ b/include/sound/soc-card.h
@@ -30,8 +30,6 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card)
 
 struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
 					       const char *name);
-struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card,
-						      const char *name);
 int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type,
 			  struct snd_soc_jack *jack);
 int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id,
diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index bf2e381cd124..61534ac0edd1 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -464,9 +464,6 @@ int snd_soc_component_force_enable_pin_unlocked(
 /* component controls */
 struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component,
 						    const char * const ctl);
-struct snd_kcontrol *
-snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component,
-				      const char * const ctl);
 int snd_soc_component_notify_control(struct snd_soc_component *component,
 				     const char * const ctl);
 
-- 
cgit v1.2.3


From 7ff7509fa52397455e04bd44982e9dfbbd19457f Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Mon, 29 Jul 2024 11:36:26 +0200
Subject: PCI: Make pcim_request_region() a public function

pcim_request_region() is the managed counterpart of pci_request_region().
It is currently only used internally for PCI.

It can be useful for a number of drivers and exporting it is a step towards
deprecating more complicated functions.

Make pcim_request_region() a public function.

Link: https://lore.kernel.org/r/20240729093625.17561-4-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..4246cb790c7b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2292,6 +2292,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
+int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
 int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
 int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name);
-- 
cgit v1.2.3


From d140f80f603584d8282817994c0c6241e736bef4 Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Wed, 7 Aug 2024 10:30:18 +0200
Subject: PCI: Deprecate pcim_iomap_regions() in favor of pcim_iomap_region()

pcim_iomap_regions() is a complicated function that uses a bit mask to
determine the BARs the user wishes to request and ioremap. Almost all users
only ever set a single bit in that mask, making that mechanism
questionable.

pcim_iomap_region() is now available as a more simple replacement.

Make pcim_iomap_region() a public function.

Mark pcim_iomap_regions() as deprecated.

Link: https://lore.kernel.org/r/20240807083018.8734-2-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4246cb790c7b..aa9ef7890c50 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2290,6 +2290,8 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
 #endif
 
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
+void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
+				const char *name);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
 int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
-- 
cgit v1.2.3


From 70114e7f7585ef078c2b7033ee14218f95f55e22 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Thu, 8 Aug 2024 15:34:02 +0300
Subject: irqdomain: Simplify simple and legacy domain creation

irq_domain_create_simple() and irq_domain_create_legacy() use
__irq_domain_instantiate(), but have extra handling of allocating interrupt
descriptors and associating interrupts in them. Some of that is duplicated.

There are also call sites which have conditonals to invoke different
interrupt domain creator functions, where one of them is usually
irq_domain_create_legacy(). Alternatively they associate the interrupts for
the legacy case after creating the domain.

Moving the extra logic of irq_domain_create_simple()/legacy() into
__irq_domain_instantiate() allows to consolidate that.

Introduce hwirq_base and virq_base members in the irq_domain_info
structure, which allows to transport the required information and add the
conditional interrupt descriptor allocation and interrupt association into
__irq_domain_instantiate().

This reduces irq_domain_create_legacy() and irq_domain_create_simple() to
trivial wrappers which fill in the info structure and allows call sites
which must support the legacy case along with more modern mechanism to
select the domain type via the parameters of the info struct.

[ tglx: Massaged change log ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/32d07bd79eb2b5416e24da9e9e8fe5955423dcf9.1723120028.git.mazziesaccount@gmail.com
---
 include/linux/irqdomain.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index de6105f68fec..bfcffa2c7047 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -291,6 +291,9 @@ struct irq_domain_chip_generic_info;
  * @hwirq_max:		Maximum number of interrupts supported by controller
  * @direct_max:		Maximum value of direct maps;
  *			Use ~0 for no limit; 0 for no direct mapping
+ * @hwirq_base:		The first hardware interrupt number (legacy domains only)
+ * @virq_base:		The first Linux interrupt number for legacy domains to
+ *			immediately associate the interrupts after domain creation
  * @bus_token:		Domain bus token
  * @ops:		Domain operation callbacks
  * @host_data:		Controller private data pointer
@@ -307,6 +310,8 @@ struct irq_domain_info {
 	unsigned int				size;
 	irq_hw_number_t				hwirq_max;
 	int					direct_max;
+	unsigned int				hwirq_base;
+	unsigned int				virq_base;
 	enum irq_domain_bus_token		bus_token;
 	const struct irq_domain_ops		*ops;
 	void					*host_data;
-- 
cgit v1.2.3


From 1e7c05292531e5b6bebe409cd531ed4ec0b2ff56 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Thu, 8 Aug 2024 22:23:06 +0200
Subject: irqdomain: Allow giving name suffix for domain

Devices can provide multiple interrupt lines. One reason for this is that
a device has multiple subfunctions, each providing its own interrupt line.
Another reason is that a device can be designed to be used (also) on a
system where some of the interrupts can be routed to another processor.

A line often further acts as a demultiplex for specific interrupts
and has it's respective set of interrupt (status, mask, ack, ...)
registers.

Regmap supports the handling of these registers and demultiplexing
interrupts, but the interrupt domain code ends up assigning the same name
for the per interrupt line domains. This causes a naming collision in the
debugFS code and leads to confusion, as /proc/interrupts shows two separate
interrupts with the same domain name and hardware interrupt number.

Instead of adding a workaround in regmap or driver code, allow giving a
name suffix for the domain name when the domain is created.

Add a name_suffix field in the irq_domain_info structure and make
irq_domain_instantiate() use this suffix if it is given when a domain is
created.

[ tglx: Adopt it to the cleanup patch and fixup the invalid NULL return ]

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/871q2yvk5x.ffs@tglx
---
 include/linux/irqdomain.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bfcffa2c7047..e432b6a12a32 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -295,6 +295,8 @@ struct irq_domain_chip_generic_info;
  * @virq_base:		The first Linux interrupt number for legacy domains to
  *			immediately associate the interrupts after domain creation
  * @bus_token:		Domain bus token
+ * @name_suffix:	Optional name suffix to avoid collisions when multiple
+ *			domains are added using same fwnode
  * @ops:		Domain operation callbacks
  * @host_data:		Controller private data pointer
  * @dgc_info:		Geneneric chip information structure pointer used to
@@ -313,6 +315,7 @@ struct irq_domain_info {
 	unsigned int				hwirq_base;
 	unsigned int				virq_base;
 	enum irq_domain_bus_token		bus_token;
+	const char				*name_suffix;
 	const struct irq_domain_ops		*ops;
 	void					*host_data;
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-- 
cgit v1.2.3


From da4fe6815aca25603944a64b0965310512e867d0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Aug 2024 14:09:00 +0800
Subject: Revert "lib/mpi: Introduce ec implementation to MPI library"

This reverts commit d58bb7e55a8a65894cc02f27c3e2bf9403e7c40f.

It's no longer needed since sm2 has been removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h | 105 ----------------------------------------------------
 1 file changed, 105 deletions(-)

(limited to 'include')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index eb0d1c1db208..89b720893e12 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -157,111 +157,6 @@ void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
 /*-- mpi-inv.c --*/
 int mpi_invm(MPI x, MPI a, MPI n);
 
-/*-- ec.c --*/
-
-/* Object to represent a point in projective coordinates */
-struct gcry_mpi_point {
-	MPI x;
-	MPI y;
-	MPI z;
-};
-
-typedef struct gcry_mpi_point *MPI_POINT;
-
-/* Models describing an elliptic curve */
-enum gcry_mpi_ec_models {
-	/* The Short Weierstrass equation is
-	 *      y^2 = x^3 + ax + b
-	 */
-	MPI_EC_WEIERSTRASS = 0,
-	/* The Montgomery equation is
-	 *      by^2 = x^3 + ax^2 + x
-	 */
-	MPI_EC_MONTGOMERY,
-	/* The Twisted Edwards equation is
-	 *      ax^2 + y^2 = 1 + bx^2y^2
-	 * Note that we use 'b' instead of the commonly used 'd'.
-	 */
-	MPI_EC_EDWARDS
-};
-
-/* Dialects used with elliptic curves */
-enum ecc_dialects {
-	ECC_DIALECT_STANDARD = 0,
-	ECC_DIALECT_ED25519,
-	ECC_DIALECT_SAFECURVE
-};
-
-/* This context is used with all our EC functions. */
-struct mpi_ec_ctx {
-	enum gcry_mpi_ec_models model; /* The model describing this curve. */
-	enum ecc_dialects dialect;     /* The ECC dialect used with the curve. */
-	int flags;                     /* Public key flags (not always used). */
-	unsigned int nbits;            /* Number of bits.  */
-
-	/* Domain parameters.  Note that they may not all be set and if set
-	 * the MPIs may be flagged as constant.
-	 */
-	MPI p;         /* Prime specifying the field GF(p).  */
-	MPI a;         /* First coefficient of the Weierstrass equation.  */
-	MPI b;         /* Second coefficient of the Weierstrass equation.  */
-	MPI_POINT G;   /* Base point (generator).  */
-	MPI n;         /* Order of G.  */
-	unsigned int h;       /* Cofactor.  */
-
-	/* The actual key.  May not be set.  */
-	MPI_POINT Q;   /* Public key.   */
-	MPI d;         /* Private key.  */
-
-	const char *name;      /* Name of the curve.  */
-
-	/* This structure is private to mpi/ec.c! */
-	struct {
-		struct {
-			unsigned int a_is_pminus3:1;
-			unsigned int two_inv_p:1;
-		} valid; /* Flags to help setting the helper vars below.  */
-
-		int a_is_pminus3;  /* True if A = P - 3. */
-
-		MPI two_inv_p;
-
-		mpi_barrett_t p_barrett;
-
-		/* Scratch variables.  */
-		MPI scratch[11];
-
-		/* Helper for fast reduction.  */
-		/*   int nist_nbits; /\* If this is a NIST curve, the # of bits. *\/ */
-		/*   MPI s[10]; */
-		/*   MPI c; */
-	} t;
-
-	/* Curve specific computation routines for the field.  */
-	void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
-	void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec);
-	void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
-	void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx);
-	void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx);
-};
-
-void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model,
-			enum ecc_dialects dialect,
-			int flags, MPI p, MPI a, MPI b);
-void mpi_ec_deinit(struct mpi_ec_ctx *ctx);
-MPI_POINT mpi_point_new(unsigned int nbits);
-void mpi_point_release(MPI_POINT p);
-void mpi_point_init(MPI_POINT p);
-void mpi_point_free_parts(MPI_POINT p);
-int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx);
-void mpi_ec_add_points(MPI_POINT result,
-			MPI_POINT p1, MPI_POINT p2,
-			struct mpi_ec_ctx *ctx);
-void mpi_ec_mul_point(MPI_POINT result,
-			MPI scalar, MPI_POINT point,
-			struct mpi_ec_ctx *ctx);
-int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx);
-
 /* inline functions */
 
 /**
-- 
cgit v1.2.3


From 72c0f57dbe8bf625108dc67e34f3472f28501776 Mon Sep 17 00:00:00 2001
From: Norman Bintang <normanbt@chromium.org>
Date: Fri, 9 Aug 2024 22:06:45 +0800
Subject: ALSA: pcm: Add xrun counter for snd_pcm_substream

This patch adds an xrun counter to snd_pcm_substream as an alternative
to using logs from XRUN_DEBUG_BASIC. The counter provides a way to track
the number of xrun occurences, accessible through the /proc interface.

The counter is enabled when CONFIG_SND_PCM_XRUN_DEBUG is set.

Example output:

$ cat /proc/asound/card0/pcm9p/sub0/status

owner_pid   : 1425
trigger_time: 235.248957291
tstamp      : 0.000000000
delay       : 1912
avail       : 480
avail_max   : 1920
-----
hw_ptr      : 672000
appl_ptr    : 673440
xrun_counter: 3  # (new row)

Signed-off-by: Norman Bintang <normanbt@chromium.org>
Reviewed-by: Chih-Yang Hsia <paulhsia@chromium.org>
Tested-by: Chih-Yang Hsia <paulhsia@chromium.org>
Reviewed-by: David Riley <davidriley@chromium.org>
Link: https://patch.msgid.link/20240809140648.3414349-1-normanbt@chromium.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index ac8f3aef9205..384032b6c59c 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -498,6 +498,9 @@ struct snd_pcm_substream {
 	/* misc flags */
 	unsigned int hw_opened: 1;
 	unsigned int managed_buffer_alloc:1;
+#ifdef CONFIG_SND_PCM_XRUN_DEBUG
+	unsigned int xrun_counter; /* number of times xrun happens */
+#endif /* CONFIG_SND_PCM_XRUN_DEBUG */
 };
 
 #define SUBSTREAM_BUSY(substream) ((substream)->ref_count > 0)
-- 
cgit v1.2.3


From 0737158aabc278526c784f63b8dd8963cdc558a9 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 30 Jul 2024 10:46:31 +0200
Subject: iio: add read scale and offset services to iio backend framework

Add iio_backend_read_scale() and iio_backend_read_offset() services
to read channel scale and offset from an IIO backbend device.

Also add a read_raw callback which replicates the read_raw callback of
the IIO framework, and is intended to request miscellaneous channel
attributes from the backend device.
Both scale and offset helpers use this callback.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240730084640.1307938-2-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 2b9d1aa86552..838092b0de3d 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -3,6 +3,7 @@
 #define _IIO_BACKEND_H_
 
 #include <linux/types.h>
+#include <linux/iio/iio.h>
 
 struct iio_chan_spec;
 struct fwnode_handle;
@@ -85,6 +86,7 @@ enum iio_backend_sample_trigger {
  * @extend_chan_spec: Extend an IIO channel.
  * @ext_info_set: Extended info setter.
  * @ext_info_get: Extended info getter.
+ * @read_raw: Read a channel attribute from a backend device
  * @debugfs_print_chan_status: Print channel status into a buffer.
  * @debugfs_reg_access: Read or write register value of backend.
  **/
@@ -119,6 +121,9 @@ struct iio_backend_ops {
 			    const char *buf, size_t len);
 	int (*ext_info_get)(struct iio_backend *back, uintptr_t private,
 			    const struct iio_chan_spec *chan, char *buf);
+	int (*read_raw)(struct iio_backend *back,
+			struct iio_chan_spec const *chan, int *val, int *val2,
+			long mask);
 	int (*debugfs_print_chan_status)(struct iio_backend *back,
 					 unsigned int chan, char *buf,
 					 size_t len);
@@ -162,7 +167,9 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private,
 				 const char *buf, size_t len);
 ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private,
 				 const struct iio_chan_spec *chan, char *buf);
-
+int iio_backend_read_raw(struct iio_backend *back,
+			 struct iio_chan_spec const *chan, int *val, int *val2,
+			 long mask);
 int iio_backend_extend_chan_spec(struct iio_backend *back,
 				 struct iio_chan_spec *chan);
 void *iio_backend_get_priv(const struct iio_backend *conv);
@@ -174,6 +181,21 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
 int devm_iio_backend_register(struct device *dev,
 			      const struct iio_backend_info *info, void *priv);
 
+static inline int iio_backend_read_scale(struct iio_backend *back,
+					 struct iio_chan_spec const *chan,
+					 int *val, int *val2)
+{
+	return iio_backend_read_raw(back, chan, val, val2, IIO_CHAN_INFO_SCALE);
+}
+
+static inline int iio_backend_read_offset(struct iio_backend *back,
+					  struct iio_chan_spec const *chan,
+					  int *val, int *val2)
+{
+	return iio_backend_read_raw(back, chan, val, val2,
+				    IIO_CHAN_INFO_OFFSET);
+}
+
 ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back,
 					      unsigned int chan, char *buf,
 					      size_t len);
-- 
cgit v1.2.3


From 2530d7d44ca6dc0c1c059f143cdcb2be8600c59a Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 30 Jul 2024 10:46:32 +0200
Subject: iio: add enable and disable services to iio backend framework

Add iio_backend_disable() and iio_backend_enable() APIs to allow
IIO backend consumer to request backend disabling and enabling.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240730084640.1307938-3-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 838092b0de3d..b0b663163ff4 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -144,6 +144,8 @@ struct iio_backend_info {
 int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan);
 int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan);
 int devm_iio_backend_enable(struct device *dev, struct iio_backend *back);
+int iio_backend_enable(struct iio_backend *back);
+void iio_backend_disable(struct iio_backend *back);
 int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan,
 				const struct iio_backend_data_fmt *data);
 int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan,
-- 
cgit v1.2.3


From c464cc610f51f6eb5f27bf905d4c1ab1896b2725 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 30 Jul 2024 10:46:33 +0200
Subject: iio: add child nodes support in iio backend framework

Add an API to support IIO generic channels binding:
http://devicetree.org/schemas/iio/adc/adc.yaml#
This new API is needed, as generic channel DT node isn't populated as a
device.
Add devm_iio_backend_fwnode_get() to allow an IIO device backend
consumer to reference backend phandles in its child nodes.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240730084640.1307938-4-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index b0b663163ff4..37d56914d485 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -176,6 +176,9 @@ int iio_backend_extend_chan_spec(struct iio_backend *back,
 				 struct iio_chan_spec *chan);
 void *iio_backend_get_priv(const struct iio_backend *conv);
 struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name);
+struct iio_backend *devm_iio_backend_fwnode_get(struct device *dev,
+						const char *name,
+						struct fwnode_handle *fwnode);
 struct iio_backend *
 __devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
 					  struct fwnode_handle *fwnode);
-- 
cgit v1.2.3


From 2837efdc7cef5b86b0c4d94a7410bc03cc2f003f Mon Sep 17 00:00:00 2001
From: Denis Benato <benato.denis96@gmail.com>
Date: Wed, 7 Aug 2024 20:56:18 +0200
Subject: iio: trigger: allow devices to suspend/resume theirs associated
 trigger

When a machine enters a sleep state while a trigger is associated to
an iio device that trigger is not resumed after exiting the sleep state:
provide iio device drivers a way to suspend and resume
the associated trigger to solve the aforementioned bug.

Each iio driver supporting external triggers is expected to call
iio_device_suspend_triggering before suspending,
and iio_device_resume_triggering upon resuming.

Signed-off-by: Denis Benato <benato.denis96@gmail.com>
Link: https://patch.msgid.link/20240807185619.7261-2-benato.denis96@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 3a735a9a9eae..18779b631e90 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -810,6 +810,23 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
 }
 #endif
 
+/**
+ * iio_device_suspend_triggering() - suspend trigger attached to an iio_dev
+ * @indio_dev: iio_dev associated with the device that will have triggers suspended
+ *
+ * Return 0 if successful, negative otherwise
+ **/
+int iio_device_suspend_triggering(struct iio_dev *indio_dev);
+
+/**
+ * iio_device_resume_triggering() - resume trigger attached to an iio_dev
+ *	that was previously suspended with iio_device_suspend_triggering()
+ * @indio_dev: iio_dev associated with the device that will have triggers resumed
+ *
+ * Return 0 if successful, negative otherwise
+ **/
+int iio_device_resume_triggering(struct iio_dev *indio_dev);
+
 #ifdef CONFIG_ACPI
 bool iio_read_acpi_mount_matrix(struct device *dev,
 				struct iio_mount_matrix *orientation,
-- 
cgit v1.2.3


From bf66471987b4bd537bc800353ca7d39bfd1d1022 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 10:51:10 +0200
Subject: context_tracking, rcu: Rename struct context_tracking
 .dynticks_nesting into .nesting

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

[ neeraj.upadhyay: Fix htmldocs build error reported by Stephen Rothwell ]

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 6 +++---
 include/trace/events/rcu.h             | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index ad6570ffeff3..65290e7677e6 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -39,7 +39,7 @@ struct context_tracking {
 	atomic_t state;
 #endif
 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
-	long dynticks_nesting;		/* Track process nesting level. */
+	long nesting;		/* Track process nesting level. */
 	long dynticks_nmi_nesting;	/* Track irq/NMI nesting level. */
 #endif
 };
@@ -77,14 +77,14 @@ static __always_inline int ct_rcu_watching_cpu_acquire(int cpu)
 
 static __always_inline long ct_dynticks_nesting(void)
 {
-	return __this_cpu_read(context_tracking.dynticks_nesting);
+	return __this_cpu_read(context_tracking.nesting);
 }
 
 static __always_inline long ct_dynticks_nesting_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-	return ct->dynticks_nesting;
+	return ct->nesting;
 }
 
 static __always_inline long ct_dynticks_nmi_nesting(void)
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 31b3e0d3e65f..4066b6d51e46 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -469,7 +469,7 @@ TRACE_EVENT(rcu_stall_warning,
  * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not
  *            being in dyntick-idle mode.
  * context: "USER" or "IDLE" or "IRQ".
- * NMIs nested in IRQs are inferred with dynticks_nesting > 1 in IRQ context.
+ * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context.
  *
  * These events also take a pair of numbers, which indicate the nesting
  * depth before and after the event of interest, and a third number that is
-- 
cgit v1.2.3


From 1089c0078b69bce0c2d540e3cc43470ba9e5dcfd Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 14:45:30 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_nesting() into ct_nesting()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 65290e7677e6..586c1ff22c2e 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -75,7 +75,7 @@ static __always_inline int ct_rcu_watching_cpu_acquire(int cpu)
 	return atomic_read_acquire(&ct->state) & CT_RCU_WATCHING_MASK;
 }
 
-static __always_inline long ct_dynticks_nesting(void)
+static __always_inline long ct_nesting(void)
 {
 	return __this_cpu_read(context_tracking.nesting);
 }
-- 
cgit v1.2.3


From bca9455da531a3c2520c28ff349d0dab4e471d28 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 14:48:26 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_nesting_cpu() into
 ct_nesting_cpu()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any
meaning.

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 586c1ff22c2e..fd42d8120ac2 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -80,7 +80,7 @@ static __always_inline long ct_nesting(void)
 	return __this_cpu_read(context_tracking.nesting);
 }
 
-static __always_inline long ct_dynticks_nesting_cpu(int cpu)
+static __always_inline long ct_nesting_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-- 
cgit v1.2.3


From dc5fface4b30508933b515a4985401c8c8f6bcfd Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 10:52:03 +0200
Subject: context_tracking, rcu: Rename struct context_tracking
 .dynticks_nmi_nesting into .nmi_nesting

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any
meaning.

[ neeraj.upadhyay: Fix htmldocs build error reported by Stephen Rothwell ]

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index fd42d8120ac2..12d00adf29e1 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -40,7 +40,7 @@ struct context_tracking {
 #endif
 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
 	long nesting;		/* Track process nesting level. */
-	long dynticks_nmi_nesting;	/* Track irq/NMI nesting level. */
+	long nmi_nesting;	/* Track irq/NMI nesting level. */
 #endif
 };
 
@@ -89,14 +89,14 @@ static __always_inline long ct_nesting_cpu(int cpu)
 
 static __always_inline long ct_dynticks_nmi_nesting(void)
 {
-	return __this_cpu_read(context_tracking.dynticks_nmi_nesting);
+	return __this_cpu_read(context_tracking.nmi_nesting);
 }
 
 static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-	return ct->dynticks_nmi_nesting;
+	return ct->nmi_nesting;
 }
 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
 
-- 
cgit v1.2.3


From 8375cb260d7e43610934af63748d1a51201449f8 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 14:46:14 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_nmi_nesting() into
 ct_nmi_nesting()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any
meaning.

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 12d00adf29e1..8f32fe599c5c 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -87,7 +87,7 @@ static __always_inline long ct_nesting_cpu(int cpu)
 	return ct->nesting;
 }
 
-static __always_inline long ct_dynticks_nmi_nesting(void)
+static __always_inline long ct_nmi_nesting(void)
 {
 	return __this_cpu_read(context_tracking.nmi_nesting);
 }
-- 
cgit v1.2.3


From 2ef2890b7a94fbbfa8fdf0a90499ae1df476b8e8 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 14:49:13 +0200
Subject: context_tracking, rcu: Rename ct_dynticks_nmi_nesting_cpu() into
 ct_nmi_nesting_cpu()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any
meaning.

Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 8f32fe599c5c..34fd504e53a8 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -92,7 +92,7 @@ static __always_inline long ct_nmi_nesting(void)
 	return __this_cpu_read(context_tracking.nmi_nesting);
 }
 
-static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu)
+static __always_inline long ct_nmi_nesting_cpu(int cpu)
 {
 	struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
 
-- 
cgit v1.2.3


From e1de438336222dbe27f2d4baa8c01074fcac2bef Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Mon, 29 Apr 2024 15:52:32 +0200
Subject: context_tracking, rcu: Rename DYNTICK_IRQ_NONIDLE into
 CT_NESTING_IRQ_NONIDLE

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any
meaning.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 34fd504e53a8..0dbda59c9f37 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -7,7 +7,7 @@
 #include <linux/context_tracking_irq.h>
 
 /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
-#define DYNTICK_IRQ_NONIDLE	((LONG_MAX / 2) + 1)
+#define CT_NESTING_IRQ_NONIDLE	((LONG_MAX / 2) + 1)
 
 enum ctx_state {
 	CT_STATE_DISABLED	= -1,	/* returned by ct_state() if unknown */
-- 
cgit v1.2.3


From 682358fd35dece838e6ae2d9d6a69fc0b9a9d411 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:13 +0300
Subject: RDMA/umem: Add support for creating pinned DMABUF umem with a given
 dma device

Add support for creating pinned DMABUF umem with a specified DMA device
instead of the DMA device of the given IB device.

This API will be utilized in the upcoming patches of the series when
multiple path DMAs are implemented.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/038aad36a43797e5591b20ba81051fc5758124f9.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_umem.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 565a85044541..de05268ed632 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -150,6 +150,11 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
 						 unsigned long offset,
 						 size_t size, int fd,
 						 int access);
+struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
+					  struct device *dma_device,
+					  unsigned long offset, size_t size,
+					  int fd, int access);
 int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
 void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
 void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
@@ -196,6 +201,16 @@ ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
+					  struct device *dma_device,
+					  unsigned long offset, size_t size,
+					  int fd, int access)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 253c61dc256b3e6be65657f78b4a8452163ce00f Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:14 +0300
Subject: RDMA/umem: Introduce an option to revoke DMABUF umem

Introduce an option to revoke DMABUF umem.

This option will retain the umem allocation while revoking its DMA
mapping. Furthermore, any subsequent attempts to map the pages should
fail once the umem has been revoked.

This functionality will be utilized in the upcoming patches in the
series, where we aim to delay umem deallocation until the mkey
deregistration. However, we must unmap its pages immediately.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/a38270f2fe4a194868ca2312f4c1c760e51bcbff.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_umem.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index de05268ed632..7dc7b1cc71b5 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,6 +38,7 @@ struct ib_umem_dmabuf {
 	unsigned long last_sg_trim;
 	void *private;
 	u8 pinned : 1;
+	u8 revoked : 1;
 };
 
 static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem)
@@ -158,6 +159,7 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
 int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
 void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
 void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -217,6 +219,7 @@ static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 }
 static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { }
 static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { }
+static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {}
 
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 #endif /* IB_UMEM_H */
-- 
cgit v1.2.3


From 3aa73c6b795b9aaaf933f3c95495d85fc0de39e3 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:15 +0300
Subject: RDMA: Pass uverbs_attr_bundle as part of '.reg_user_mr_dmabuf' API

Pass uverbs_attr_bundle as part of '.reg_user_mr_dmabuf' API instead of
udata.

This enables passing some new ioctl attributes to the drivers, as will
be introduced in the next patches for mlx5 driver.

Change the involved drivers accordingly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/9a25b2fc02443f7c36c2d93499ae25252b6afd40.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6c5712ae559d..a1dcf812d787 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2476,7 +2476,7 @@ struct ib_device_ops {
 	struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
 					    u64 length, u64 virt_addr, int fd,
 					    int mr_access_flags,
-					    struct ib_udata *udata);
+					    struct uverbs_attr_bundle *attrs);
 	struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
 				       u64 length, u64 virt_addr,
 				       int mr_access_flags, struct ib_pd *pd,
-- 
cgit v1.2.3


From de8f847a5114ff7cfcdfc114af8485c431dec703 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:16 +0300
Subject: RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct

Add support for DMABUF MR registrations with Data-direct device.

Upon userspace calling to register a DMABUF MR with the data direct bit
set, the below algorithm will be followed.

1) Obtain a pinned DMABUF umem from the IB core using the user input
parameters (FD, offset, length) and the DMA PF device.  The DMA PF
device is needed to allow the IOMMU to enable the DMA PF to access the
user buffer over PCI.

2) Create a KSM MKEY by setting its entries according to the user buffer
VA to IOVA mapping, with the MKEY being the data direct device-crossed
MKEY. This KSM MKEY is umrable and will be used as part of the MR cache.
The PD for creating it is the internal device 'data direct' kernel one.

3) Create a crossing MKEY that points to the KSM MKEY using the crossing
access mode.

4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs
managed on the mlx5_ib device.

5) Return the crossing MKEY to the user, created with its supplied PD.

Upon DMA PF unbind flow, the driver will revoke the KSM entries.
The final deregistration will occur under the hood once the application
deregisters its MKEY.

Notes:
- This version supports only the PINNED UMEM mode, so there is no
  dependency on ODP.
- The IOVA supplied by the application must be system page aligned due to
  HW translations of KSM.
- The crossing MKEY will not be umrable or part of the MR cache, as we
  cannot change its crossed (i.e. KSM) MKEY over UMR.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/mlx5_user_ioctl_cmds.h  | 4 ++++
 include/uapi/rdma/mlx5_user_ioctl_verbs.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 5b74d6534899..106276a4cce7 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -274,6 +274,10 @@ enum mlx5_ib_create_cq_attrs {
 	MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
 };
 
+enum mlx5_ib_reg_dmabuf_mr_attrs {
+	MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
+
 #define MLX5_IB_DW_MATCH_PARAM 0xA0
 
 struct mlx5_ib_match_params {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 3189c7f08d17..7c233df475e7 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -54,6 +54,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type {
 	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
 };
 
+enum mlx5_ib_uapi_reg_dmabuf_flags {
+	MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0,
+};
+
 struct mlx5_ib_uapi_devx_async_cmd_hdr {
 	__aligned_u64	wr_id;
 	__u8		out_data[];
-- 
cgit v1.2.3


From ec7ad6530909983c8736c80af46e3529ce7bab55 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Thu, 1 Aug 2024 15:05:17 +0300
Subject: RDMA/mlx5: Introduce GET_DATA_DIRECT_SYSFS_PATH ioctl

Introduce the 'GET_DATA_DIRECT_SYSFS_PATH' ioctl to return the sysfs
path of the affiliated 'data direct' device for a given device.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/403745463e0ef52adbef681ff09aa6a29a756352.1722512548.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/mlx5_user_ioctl_cmds.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 106276a4cce7..fd2e4a3a56b3 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -348,6 +348,7 @@ enum mlx5_ib_pd_methods {
 
 enum mlx5_ib_device_methods {
 	MLX5_IB_METHOD_QUERY_PORT = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH,
 };
 
 enum mlx5_ib_query_port_attrs {
@@ -355,4 +356,8 @@ enum mlx5_ib_query_port_attrs {
 	MLX5_IB_ATTR_QUERY_PORT,
 };
 
+enum mlx5_ib_get_data_direct_sysfs_path_attrs {
+	MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH = (1U << UVERBS_ID_NS_SHIFT),
+};
+
 #endif
-- 
cgit v1.2.3


From ccb41c445a3e9506ef43fe33867a356048e41477 Mon Sep 17 00:00:00 2001
From: Kwanghoon Son <k.son@samsung.com>
Date: Fri, 9 Aug 2024 20:54:12 +0900
Subject: dt-bindings: clock: exynosautov9: add dpum clock

Add dpum clock definitions and compatibles.

Also used clock name 'bus' instead of full clock name
dout_clkcmu_dpum_bus like other board cmu schema (GS101).

Signed-off-by: Kwanghoon Son <k.son@samsung.com>
Link: https://lore.kernel.org/r/20240809-clk_dpum-v3-1-359decc30fe2@samsung.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/samsung,exynosautov9.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/samsung,exynosautov9.h b/include/dt-bindings/clock/samsung,exynosautov9.h
index 3065375c2d8b..ce8fb8f7d718 100644
--- a/include/dt-bindings/clock/samsung,exynosautov9.h
+++ b/include/dt-bindings/clock/samsung,exynosautov9.h
@@ -179,6 +179,17 @@
 #define CLK_GOUT_CORE_CCI_PCLK		4
 #define CLK_GOUT_CORE_CMU_CORE_PCLK	5
 
+/* CMU_DPUM */
+#define CLK_MOUT_DPUM_BUS_USER		1
+#define CLK_DOUT_DPUM_BUSP		2
+#define CLK_GOUT_DPUM_ACLK_DECON	3
+#define CLK_GOUT_DPUM_ACLK_DMA		4
+#define CLK_GOUT_DPUM_ACLK_DPP		5
+#define CLK_GOUT_DPUM_SYSMMU_D0_CLK	6
+#define CLK_GOUT_DPUM_SYSMMU_D1_CLK	7
+#define CLK_GOUT_DPUM_SYSMMU_D2_CLK	8
+#define CLK_GOUT_DPUM_SYSMMU_D3_CLK	9
+
 /* CMU_FSYS0 */
 #define CLK_MOUT_FSYS0_BUS_USER		1
 #define CLK_MOUT_FSYS0_PCIE_USER	2
-- 
cgit v1.2.3


From aa9fbc5dd9da9e095a8b1a58540cf20cb06f595f Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 8 Aug 2024 12:41:17 +0100
Subject: net: mii: constify advertising mask

Constify the advertising mask to linkmode functions that only read from
the advertising mask.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index d5a959ce4877..b8f26d4513c3 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -140,7 +140,7 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv)
  * settings to phy autonegotiation advertisements for the
  * MII_ADVERTISE register.
  */
-static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising)
+static inline u32 linkmode_adv_to_mii_adv_t(const unsigned long *advertising)
 {
 	u32 result = 0;
 
@@ -215,7 +215,8 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv)
  * settings to phy autonegotiation advertisements for the
  * MII_CTRL1000 register when in 1000T mode.
  */
-static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising)
+static inline u32
+linkmode_adv_to_mii_ctrl1000_t(const unsigned long *advertising)
 {
 	u32 result = 0;
 
@@ -453,7 +454,7 @@ static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising,
  * A small helper function that translates linkmode advertising to LVL
  * pause capabilities.
  */
-static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising)
+static inline u32 linkmode_adv_to_lcl_adv_t(const unsigned long *advertising)
 {
 	u32 lcl_adv = 0;
 
-- 
cgit v1.2.3


From d0f8a8973f265f6a276f99d091af99edfb2b87de Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Thu, 8 Aug 2024 00:14:13 +0000
Subject: mm/memblock: introduce a new helper
 memblock_estimated_nr_free_pages()

During bootup, system may need the number of free pages in the whole system
to do some calculation before all pages are freed to buddy system. Usually
this number is get from totalram_pages(). Since we plan to move the free
pages accounting in __free_pages_core(), this value may not represent
total free pages at the early stage, especially when
CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.

Instead of using raw memblock api, let's introduce a new helper for user
to get the estimated number of free pages from memblock point of view.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
CC: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20240808001415.6298-1-richard.weiyang@gmail.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/memblock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index fc4d75c6cec3..673d5cae7c81 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -467,6 +467,7 @@ static inline __init_memblock bool memblock_bottom_up(void)
 
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
+unsigned long memblock_estimated_nr_free_pages(void);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
-- 
cgit v1.2.3


From d27a14060f8501e556a65b346b2644be0d0a2de8 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Wed, 7 Aug 2024 15:36:12 +0200
Subject: drm/panic: Move drm_panic_register prototype to drm_crtc_internal.h

drm_panic_[un]register() are only used by the core drm, and are not
intended to be called by other drm drivers, so move their prototypes
to drm_crtc_internal.h.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-4-jfalempe@redhat.com
---
 include/drm/drm_panic.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h
index 73bb3f3d9ed9..a4bd3681920d 100644
--- a/include/drm/drm_panic.h
+++ b/include/drm/drm_panic.h
@@ -146,16 +146,4 @@ struct drm_scanout_buffer {
 #define drm_panic_unlock(dev, flags) \
 	raw_spin_unlock_irqrestore(&(dev)->mode_config.panic_lock, flags)
 
-#ifdef CONFIG_DRM_PANIC
-
-void drm_panic_register(struct drm_device *dev);
-void drm_panic_unregister(struct drm_device *dev);
-
-#else
-
-static inline void drm_panic_register(struct drm_device *dev) {}
-static inline void drm_panic_unregister(struct drm_device *dev) {}
-
-#endif
-
 #endif /* __DRM_PANIC_H__ */
-- 
cgit v1.2.3


From 969135862e731620b9e03bb0c21179ff1cccfd0e Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Wed, 7 Aug 2024 15:36:13 +0200
Subject: drm/panic: Move copyright notice to the top

Move the copyright notice to the top of drm_panic.h, and add the
missing Red Hat copyright notice.

Suggested-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-5-jfalempe@redhat.com
---
 include/drm/drm_panic.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h
index a4bd3681920d..54085d5d05c3 100644
--- a/include/drm/drm_panic.h
+++ b/include/drm/drm_panic.h
@@ -1,4 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 or MIT */
+
+/*
+ * Copyright (c) 2024 Intel
+ * Copyright (c) 2024 Red Hat
+ */
+
 #ifndef __DRM_PANIC_H__
 #define __DRM_PANIC_H__
 
@@ -8,9 +14,6 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_fourcc.h>
-/*
- * Copyright (c) 2024 Intel
- */
 
 /**
  * struct drm_scanout_buffer - DRM scanout buffer
-- 
cgit v1.2.3


From 0dc4fb69eb14320ea0fcd9657b7748eec201ccaa Mon Sep 17 00:00:00 2001
From: Mohammed Anees <pvmohammedanees2003@gmail.com>
Date: Sun, 11 Aug 2024 06:16:51 -0400
Subject: drm: Add missing documentation for struct drm_plane_size_hint

This patch takes care of the following warnings during documentation
compiling:

./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'width' not described in 'drm_plane_size_hint'
./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'height' not described in 'drm_plane_size_hint'

Signed-off-by: Mohammed Anees <pvmohammedanees2003@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240811101653.170223-1-pvmohammedanees2003@gmail.com
---
 include/uapi/drm/drm_mode.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index d390011b89b4..c082810c08a8 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -859,6 +859,8 @@ struct drm_color_lut {
 
 /**
  * struct drm_plane_size_hint - Plane size hints
+ * @width: The width of the plane in pixel
+ * @height: The height of the plane in pixel
  *
  * The plane SIZE_HINTS property blob contains an
  * array of struct drm_plane_size_hint.
-- 
cgit v1.2.3


From 8c38617722bdf57a90e6c77ed9ee5ebb60958d2a Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Fri, 9 Aug 2024 16:15:54 +0200
Subject: memory: ti-aemif: remove platform data support

There are no longer any users of the ti-aemif driver that set up platform
data from board files. We can shrink the driver by removing support for
it.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20240809-ti-aemif-v1-1-27b1e5001390@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/platform_data/ti-aemif.h | 45 ----------------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 include/linux/platform_data/ti-aemif.h

(limited to 'include')

diff --git a/include/linux/platform_data/ti-aemif.h b/include/linux/platform_data/ti-aemif.h
deleted file mode 100644
index 77625251df07..000000000000
--- a/include/linux/platform_data/ti-aemif.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * TI DaVinci AEMIF platform glue.
- *
- * Copyright (C) 2017 BayLibre SAS
- *
- * Author:
- *   Bartosz Golaszewski <bgolaszewski@baylibre.com>
- */
-
-#ifndef __TI_DAVINCI_AEMIF_DATA_H__
-#define __TI_DAVINCI_AEMIF_DATA_H__
-
-#include <linux/of_platform.h>
-
-/**
- * struct aemif_abus_data - Async bus configuration parameters.
- *
- * @cs - Chip-select number.
- */
-struct aemif_abus_data {
-	u32 cs;
-};
-
-/**
- * struct aemif_platform_data - Data to set up the TI aemif driver.
- *
- * @dev_lookup: of_dev_auxdata passed to of_platform_populate() for aemif
- *              subdevices.
- * @cs_offset: Lowest allowed chip-select number.
- * @abus_data: Array of async bus configuration entries.
- * @num_abus_data: Number of abus entries.
- * @sub_devices: Array of platform subdevices.
- * @num_sub_devices: Number of subdevices.
- */
-struct aemif_platform_data {
-	struct of_dev_auxdata *dev_lookup;
-	u32 cs_offset;
-	struct aemif_abus_data *abus_data;
-	size_t num_abus_data;
-	struct platform_device *sub_devices;
-	size_t num_sub_devices;
-};
-
-#endif /* __TI_DAVINCI_AEMIF_DATA_H__ */
-- 
cgit v1.2.3


From e9d05e9d5db155dcc708891611f1b6f977c3fa11 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Thu, 8 Aug 2024 22:40:54 +0200
Subject: media: uapi: rkisp1-config: Add extensible params format

Add to the rkisp1-config.h header data types and documentation of
the extensible parameters format.

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/uapi/linux/rkisp1-config.h | 491 +++++++++++++++++++++++++++++++++++++
 1 file changed, 491 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index 6eeaf8bf2362..9767bb19c72d 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -996,4 +996,495 @@ struct rkisp1_stat_buffer {
 	struct rkisp1_cif_isp_stat params;
 };
 
+/*---------- PART3: Extensible Configuration Parameters  ------------*/
+
+/**
+ * enum rkisp1_ext_params_block_type - RkISP1 extensible params block type
+ *
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS: Black level subtraction
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC: Defect pixel cluster correction
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG: Sensor de-gamma
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN: Auto white balance gains
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT: ISP filtering
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM: Bayer de-mosaic
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK: Cross-talk correction
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC: Gamma out correction
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF: De-noise pre-filter
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH: De-noise pre-filter strength
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC: Color processing
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_IE: Image effects
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC: Lens shading correction
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS: Auto white balance statistics
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS: Histogram statistics
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS: Auto exposure statistics
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS: Auto-focus statistics
+ */
+enum rkisp1_ext_params_block_type {
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_IE,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS,
+};
+
+#define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE	(1U << 0)
+#define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE	(1U << 1)
+
+/**
+ * struct rkisp1_ext_params_block_header - RkISP1 extensible parameters block
+ *					   header
+ *
+ * This structure represents the common part of all the ISP configuration
+ * blocks. Each parameters block shall embed an instance of this structure type
+ * as its first member, followed by the block-specific configuration data. The
+ * driver inspects this common header to discern the block type and its size and
+ * properly handle the block content by casting it to the correct block-specific
+ * type.
+ *
+ * The @type field is one of the values enumerated by
+ * :c:type:`rkisp1_ext_params_block_type` and specifies how the data should be
+ * interpreted by the driver. The @size field specifies the size of the
+ * parameters block and is used by the driver for validation purposes.
+ *
+ * The @flags field is a bitmask of per-block flags RKISP1_EXT_PARAMS_FL_*.
+ *
+ * When userspace wants to configure and enable an ISP block it shall fully
+ * populate the block configuration and set the
+ * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE bit in the @flags field.
+ *
+ * When userspace simply wants to disable an ISP block the
+ * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bit should be set in @flags field. The
+ * driver ignores the rest of the block configuration structure in this case.
+ *
+ * If a new configuration of an ISP block has to be applied userspace shall
+ * fully populate the ISP block configuration and omit setting the
+ * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits
+ * in the @flags field.
+ *
+ * Setting both the RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and
+ * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits in the @flags field is not allowed
+ * and not accepted by the driver.
+ *
+ * Userspace is responsible for correctly populating the parameters block header
+ * fields (@type, @flags and @size) and the block-specific parameters.
+ *
+ * For example:
+ *
+ * .. code-block:: c
+ *
+ *	void populate_bls(struct rkisp1_ext_params_block_header *block) {
+ *		struct rkisp1_ext_params_bls_config *bls =
+ *			(struct rkisp1_ext_params_bls_config *)block;
+ *
+ *		bls->header.type = RKISP1_EXT_PARAMS_BLOCK_ID_BLS;
+ *		bls->header.flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE;
+ *		bls->header.size = sizeof(*bls);
+ *
+ *		bls->config.enable_auto = 0;
+ *		bls->config.fixed_val.r = blackLevelRed_;
+ *		bls->config.fixed_val.gr = blackLevelGreenR_;
+ *		bls->config.fixed_val.gb = blackLevelGreenB_;
+ *		bls->config.fixed_val.b = blackLevelBlue_;
+ *	}
+ *
+ * @type: The parameters block type, see
+ *	  :c:type:`rkisp1_ext_params_block_type`
+ * @flags: A bitmask of block flags
+ * @size: Size (in bytes) of the parameters block, including this header
+ */
+struct rkisp1_ext_params_block_header {
+	__u16 type;
+	__u16 flags;
+	__u32 size;
+};
+
+/**
+ * struct rkisp1_ext_params_bls_config - RkISP1 extensible params BLS config
+ *
+ * RkISP1 extensible parameters Black Level Subtraction configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Black Level Subtraction configuration, see
+ *	    :c:type:`rkisp1_cif_isp_bls_config`
+ */
+struct rkisp1_ext_params_bls_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_bls_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_dpcc_config - RkISP1 extensible params DPCC config
+ *
+ * RkISP1 extensible parameters Defective Pixel Cluster Correction configuration
+ * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Defective Pixel Cluster Correction configuration, see
+ *	    :c:type:`rkisp1_cif_isp_dpcc_config`
+ */
+struct rkisp1_ext_params_dpcc_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_dpcc_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_sdg_config - RkISP1 extensible params SDG config
+ *
+ * RkISP1 extensible parameters Sensor Degamma configuration block. Identified
+ * by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Sensor Degamma configuration, see
+ *	    :c:type:`rkisp1_cif_isp_sdg_config`
+ */
+struct rkisp1_ext_params_sdg_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_sdg_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_lsc_config - RkISP1 extensible params LSC config
+ *
+ * RkISP1 extensible parameters Lens Shading Correction configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Lens Shading Correction configuration, see
+ *	    :c:type:`rkisp1_cif_isp_lsc_config`
+ */
+struct rkisp1_ext_params_lsc_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_lsc_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_awb_gain_config - RkISP1 extensible params AWB
+ *					      gain config
+ *
+ * RkISP1 extensible parameters Auto-White Balance Gains configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Auto-White Balance Gains configuration, see
+ *	    :c:type:`rkisp1_cif_isp_awb_gain_config`
+ */
+struct rkisp1_ext_params_awb_gain_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_awb_gain_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_flt_config - RkISP1 extensible params FLT config
+ *
+ * RkISP1 extensible parameters Filter configuration block. Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Filter configuration, see :c:type:`rkisp1_cif_isp_flt_config`
+ */
+struct rkisp1_ext_params_flt_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_flt_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_bdm_config - RkISP1 extensible params BDM config
+ *
+ * RkISP1 extensible parameters Demosaicing configuration block. Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Demosaicing configuration, see :c:type:`rkisp1_cif_isp_bdm_config`
+ */
+struct rkisp1_ext_params_bdm_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_bdm_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_ctk_config - RkISP1 extensible params CTK config
+ *
+ * RkISP1 extensible parameters Cross-Talk configuration block. Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Cross-Talk configuration, see :c:type:`rkisp1_cif_isp_ctk_config`
+ */
+struct rkisp1_ext_params_ctk_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_ctk_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_goc_config - RkISP1 extensible params GOC config
+ *
+ * RkISP1 extensible parameters Gamma-Out configuration block. Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Gamma-Out configuration, see :c:type:`rkisp1_cif_isp_goc_config`
+ */
+struct rkisp1_ext_params_goc_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_goc_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_dpf_config - RkISP1 extensible params DPF config
+ *
+ * RkISP1 extensible parameters De-noise Pre-Filter configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: De-noise Pre-Filter configuration, see
+ *	    :c:type:`rkisp1_cif_isp_dpf_config`
+ */
+struct rkisp1_ext_params_dpf_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_dpf_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_dpf_strength_config - RkISP1 extensible params DPF
+ *						  strength config
+ *
+ * RkISP1 extensible parameters De-noise Pre-Filter strength configuration
+ * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: De-noise Pre-Filter strength configuration, see
+ *	    :c:type:`rkisp1_cif_isp_dpf_strength_config`
+ */
+struct rkisp1_ext_params_dpf_strength_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_dpf_strength_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_cproc_config - RkISP1 extensible params CPROC config
+ *
+ * RkISP1 extensible parameters Color Processing configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Color processing configuration, see
+ *	    :c:type:`rkisp1_cif_isp_cproc_config`
+ */
+struct rkisp1_ext_params_cproc_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_cproc_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_ie_config - RkISP1 extensible params IE config
+ *
+ * RkISP1 extensible parameters Image Effect configuration block. Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_IE`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Image Effect configuration, see :c:type:`rkisp1_cif_isp_ie_config`
+ */
+struct rkisp1_ext_params_ie_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_ie_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_awb_meas_config - RkISP1 extensible params AWB
+ *					      Meas config
+ *
+ * RkISP1 extensible parameters Auto-White Balance Measurement configuration
+ * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Auto-White Balance measure configuration, see
+ *	    :c:type:`rkisp1_cif_isp_awb_meas_config`
+ */
+struct rkisp1_ext_params_awb_meas_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_awb_meas_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_hst_config - RkISP1 extensible params Histogram config
+ *
+ * RkISP1 extensible parameters Histogram statistics configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Histogram statistics configuration, see
+ *	    :c:type:`rkisp1_cif_isp_hst_config`
+ */
+struct rkisp1_ext_params_hst_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_hst_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_aec_config - RkISP1 extensible params AEC config
+ *
+ * RkISP1 extensible parameters Auto-Exposure statistics configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Auto-Exposure statistics configuration, see
+ *	    :c:type:`rkisp1_cif_isp_aec_config`
+ */
+struct rkisp1_ext_params_aec_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_aec_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_afc_config - RkISP1 extensible params AFC config
+ *
+ * RkISP1 extensible parameters Auto-Focus statistics configuration block.
+ * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Auto-Focus statistics configuration, see
+ *	    :c:type:`rkisp1_cif_isp_afc_config`
+ */
+struct rkisp1_ext_params_afc_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_afc_config config;
+} __attribute__((aligned(8)));
+
+#define RKISP1_EXT_PARAMS_MAX_SIZE					\
+	(sizeof(struct rkisp1_ext_params_bls_config)			+\
+	sizeof(struct rkisp1_ext_params_dpcc_config)			+\
+	sizeof(struct rkisp1_ext_params_sdg_config)			+\
+	sizeof(struct rkisp1_ext_params_lsc_config)			+\
+	sizeof(struct rkisp1_ext_params_awb_gain_config)		+\
+	sizeof(struct rkisp1_ext_params_flt_config)			+\
+	sizeof(struct rkisp1_ext_params_bdm_config)			+\
+	sizeof(struct rkisp1_ext_params_ctk_config)			+\
+	sizeof(struct rkisp1_ext_params_goc_config)			+\
+	sizeof(struct rkisp1_ext_params_dpf_config)			+\
+	sizeof(struct rkisp1_ext_params_dpf_strength_config)		+\
+	sizeof(struct rkisp1_ext_params_cproc_config)			+\
+	sizeof(struct rkisp1_ext_params_ie_config)			+\
+	sizeof(struct rkisp1_ext_params_awb_meas_config)		+\
+	sizeof(struct rkisp1_ext_params_hst_config)			+\
+	sizeof(struct rkisp1_ext_params_aec_config)			+\
+	sizeof(struct rkisp1_ext_params_afc_config))
+
+/**
+ * enum rksip1_ext_param_buffer_version - RkISP1 extensible parameters version
+ *
+ * @RKISP1_EXT_PARAM_BUFFER_V1: First version of RkISP1 extensible parameters
+ */
+enum rksip1_ext_param_buffer_version {
+	RKISP1_EXT_PARAM_BUFFER_V1 = 1,
+};
+
+/**
+ * struct rkisp1_ext_params_cfg - RkISP1 extensible parameters configuration
+ *
+ * This struct contains the configuration parameters of the RkISP1 ISP
+ * algorithms, serialized by userspace into a data buffer. Each configuration
+ * parameter block is represented by a block-specific structure which contains a
+ * :c:type:`rkisp1_ext_params_block_header` entry as first member. Userspace
+ * populates the @data buffer with configuration parameters for the blocks that
+ * it intends to configure. As a consequence, the data buffer effective size
+ * changes according to the number of ISP blocks that userspace intends to
+ * configure and is set by userspace in the @data_size field.
+ *
+ * The parameters buffer is versioned by the @version field to allow modifying
+ * and extending its definition. Userspace shall populate the @version field to
+ * inform the driver about the version it intends to use. The driver will parse
+ * and handle the @data buffer according to the data layout specific to the
+ * indicated version and return an error if the desired version is not
+ * supported.
+ *
+ * Currently the single RKISP1_EXT_PARAM_BUFFER_V1 version is supported.
+ * When a new format version will be added, a mechanism for userspace to query
+ * the supported format versions will be implemented in the form of a read-only
+ * V4L2 control. If such control is not available, userspace should assume only
+ * RKISP1_EXT_PARAM_BUFFER_V1 is supported by the driver.
+ *
+ * For each ISP block that userspace wants to configure, a block-specific
+ * structure is appended to the @data buffer, one after the other without gaps
+ * in between nor overlaps. Userspace shall populate the @data_size field with
+ * the effective size, in bytes, of the @data buffer.
+ *
+ * The expected memory layout of the parameters buffer is::
+ *
+ *	+-------------------- struct rkisp1_ext_params_cfg -------------------+
+ *	| version = RKISP_EXT_PARAMS_BUFFER_V1;                               |
+ *	| data_size = sizeof(struct rkisp1_ext_params_bls_config)             |
+ *	|           + sizeof(struct rkisp1_ext_params_dpcc_config);           |
+ *	| +------------------------- data  ---------------------------------+ |
+ *	| | +------------- struct rkisp1_ext_params_bls_config -----------+ | |
+ *	| | | +-------- struct rkisp1_ext_params_block_header  ---------+ | | |
+ *	| | | | type = RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS;                | | | |
+ *	| | | | flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE;              | | | |
+ *	| | | | size = sizeof(struct rkisp1_ext_params_bls_config);     | | | |
+ *	| | | +---------------------------------------------------------+ | | |
+ *	| | | +---------- struct rkisp1_cif_isp_bls_config -------------+ | | |
+ *	| | | | enable_auto = 0;                                        | | | |
+ *	| | | | fixed_val.r = 256;                                      | | | |
+ *	| | | | fixed_val.gr = 256;                                     | | | |
+ *	| | | | fixed_val.gb = 256;                                     | | | |
+ *	| | | | fixed_val.b = 256;                                      | | | |
+ *	| | | +---------------------------------------------------------+ | | |
+ *	| | +------------ struct rkisp1_ext_params_dpcc_config -----------+ | |
+ *	| | | +-------- struct rkisp1_ext_params_block_header  ---------+ | | |
+ *	| | | | type = RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC;               | | | |
+ *	| | | | flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE;              | | | |
+ *	| | | | size = sizeof(struct rkisp1_ext_params_dpcc_config);    | | | |
+ *	| | | +---------------------------------------------------------+ | | |
+ *	| | | +---------- struct rkisp1_cif_isp_dpcc_config ------------+ | | |
+ *	| | | | mode = RKISP1_CIF_ISP_DPCC_MODE_STAGE1_ENABLE;          | | | |
+ *	| | | | output_mode =                                           | | | |
+ *	| | | |   RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_INCL_G_CENTER; | | | |
+ *	| | | | set_use = ... ;                                         | | | |
+ *	| | | | ...  = ... ;                                            | | | |
+ *	| | | +---------------------------------------------------------+ | | |
+ *	| | +-------------------------------------------------------------+ | |
+ *	| +-----------------------------------------------------------------+ |
+ *	+---------------------------------------------------------------------+
+ *
+ * @version: The RkISP1 extensible parameters buffer version, see
+ *	     :c:type:`rksip1_ext_param_buffer_version`
+ * @data_size: The RkISP1 configuration data effective size, excluding this
+ *	       header
+ * @data: The RkISP1 extensible configuration data blocks
+ */
+struct rkisp1_ext_params_cfg {
+	__u32 version;
+	__u32 data_size;
+	__u8 data[RKISP1_EXT_PARAMS_MAX_SIZE];
+};
+
 #endif /* _UAPI_RKISP1_CONFIG_H */
-- 
cgit v1.2.3


From 1fc379f6241b331207c4573c4ff43526fe404301 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Thu, 8 Aug 2024 22:40:55 +0200
Subject: media: uapi: videodev2: Add V4L2_META_FMT_RK_ISP1_EXT_PARAMS

The rkisp1 driver stores ISP configuration parameters in the fixed
rkisp1_params_cfg structure. As the members of the structure are part of
the userspace API, the structure layout is immutable and cannot be
extended further. Introducing new parameters or modifying the existing
ones would change the buffer layout and cause breakages in existing
applications.

The allow for future extensions to the ISP parameters, introduce a new
extensible parameters format, with a new format 4CC. Document usage of
the new format in the rkisp1 admin guide.

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Daniel Scally <dan.scally@ideasonboard.com>
Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 4e91362da6da..725e86c4bbbd 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -854,6 +854,7 @@ struct v4l2_pix_format {
 /* Vendor specific - used for RK_ISP1 camera sub-system */
 #define V4L2_META_FMT_RK_ISP1_PARAMS	v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */
 #define V4L2_META_FMT_RK_ISP1_STAT_3A	v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */
+#define V4L2_META_FMT_RK_ISP1_EXT_PARAMS	v4l2_fourcc('R', 'K', '1', 'E') /* Rockchip ISP1 3a Extensible Parameters */
 
 /* Vendor specific - used for RaspberryPi PiSP */
 #define V4L2_META_FMT_RPI_BE_CFG	v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */
-- 
cgit v1.2.3


From ce056504e2e53b22c1f789cff2ad6a0a135dc24d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 9 Aug 2024 22:37:20 -0700
Subject: ethtool: make ethtool_ops::cap_rss_ctx_supported optional

cap_rss_ctx_supported was created because the API for creating
and configuring additional contexts is mux'ed with the normal
RSS API. Presence of ops does not imply driver can actually
support rss_context != 0 (in fact drivers mostly ignore that
field). cap_rss_ctx_supported lets core check that the driver
is context-aware before calling it.

Now that we have .create_rxfh_context, there is no such
ambiguity. We can depend on presence of the op.
Make setting the bit optional.

Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 989c94eddb2b..a149485904d8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -727,7 +727,8 @@ struct kernel_ethtool_ts_info {
  * @cap_link_lanes_supported: indicates if the driver supports lanes
  *	parameter.
  * @cap_rss_ctx_supported: indicates if the driver supports RSS
- *	contexts.
+ *	contexts via legacy API, drivers implementing @create_rxfh_context
+ *	do not have to set this bit.
  * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
  *	RSS.
  * @rxfh_indir_space: max size of RSS indirection tables, if indirection table
-- 
cgit v1.2.3


From ec6e57beaf8bc64ea0c2dc0cc360afcc7f504425 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 9 Aug 2024 22:37:22 -0700
Subject: ethtool: rss: don't report key if device doesn't support it

marvell/otx2 and mvpp2 do not support setting different
keys for different RSS contexts. Contexts have separate
indirection tables but key is shared with all other contexts.
This is likely fine, indirection table is the most important
piece.

Don't report the key-related parameters from such drivers.
This prevents driver-errors, e.g. otx2 always writes
the main key, even when user asks to change per-context key.
The second reason is that without this change tracking
the keys by the core gets complicated. Even if the driver
correctly reject setting key with rss_context != 0,
change of the main key would have to be reflected in
the XArray for all additional contexts.

Since the additional contexts don't have their own keys
not including the attributes (in Netlink speak) seems
intuitive. ethtool CLI seems to deal with it just fine.

Having to set the flag in majority of the drivers is
a bit tedious but not reporting the key is a safer
default.

Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index a149485904d8..12f6dc567598 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -731,6 +731,9 @@ struct kernel_ethtool_ts_info {
  *	do not have to set this bit.
  * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
  *	RSS.
+ * @rxfh_per_ctx_key: device supports setting different RSS key for each
+ *	additional context. Netlink API should report hfunc, key, and input_xfrm
+ *	for every context, not just context 0.
  * @rxfh_indir_space: max size of RSS indirection tables, if indirection table
  *	size as returned by @get_rxfh_indir_size may change during lifetime
  *	of the device. Leave as 0 if the table size is constant.
@@ -952,6 +955,7 @@ struct ethtool_ops {
 	u32     cap_link_lanes_supported:1;
 	u32     cap_rss_ctx_supported:1;
 	u32	cap_rss_sym_xor_supported:1;
+	u32	rxfh_per_ctx_key:1;
 	u32	rxfh_indir_space;
 	u16	rxfh_key_space;
 	u16	rxfh_priv_size;
-- 
cgit v1.2.3


From 3d50c66c0609c8b64fb22e9c188fca88f34e7c98 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 9 Aug 2024 22:37:26 -0700
Subject: ethtool: rss: support skipping contexts during dump

Applications may want to deal with dynamic RSS contexts only.
So dumping context 0 will be counter-productive for them.
Support starting the dump from a given context ID.

Alternative would be to implement a dump flag to skip just
context 0, not sure which is better...

Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 6d5bdcc67631..93c57525a975 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -965,6 +965,7 @@ enum {
 	ETHTOOL_A_RSS_INDIR,		/* binary */
 	ETHTOOL_A_RSS_HKEY,		/* binary */
 	ETHTOOL_A_RSS_INPUT_XFRM,	/* u32 */
+	ETHTOOL_A_RSS_START_CONTEXT,	/* u32 */
 
 	__ETHTOOL_A_RSS_CNT,
 	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
-- 
cgit v1.2.3


From fc9aef4382c02774662da3d7e1de8ba224e04f80 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 25 Jul 2024 08:23:40 -0400
Subject: platform/x86/intel/vsec.h: Move to include/linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some drivers outside of PDX86 need access to the vsec header. Move it to
include/linux to make it easier to include.

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://lore.kernel.org/r/20240725122346.4063913-2-michael.j.ruhl@intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/intel_vsec.h | 134 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 include/linux/intel_vsec.h

(limited to 'include')

diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
new file mode 100644
index 000000000000..6495e37c9079
--- /dev/null
+++ b/include/linux/intel_vsec.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_VSEC_H
+#define _INTEL_VSEC_H
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bits.h>
+
+#define VSEC_CAP_TELEMETRY	BIT(0)
+#define VSEC_CAP_WATCHER	BIT(1)
+#define VSEC_CAP_CRASHLOG	BIT(2)
+#define VSEC_CAP_SDSI		BIT(3)
+#define VSEC_CAP_TPMI		BIT(4)
+
+/* Intel DVSEC offsets */
+#define INTEL_DVSEC_ENTRIES		0xA
+#define INTEL_DVSEC_SIZE		0xB
+#define INTEL_DVSEC_TABLE		0xC
+#define INTEL_DVSEC_TABLE_BAR(x)	((x) & GENMASK(2, 0))
+#define INTEL_DVSEC_TABLE_OFFSET(x)	((x) & GENMASK(31, 3))
+#define TABLE_OFFSET_SHIFT		3
+
+struct pci_dev;
+struct resource;
+
+enum intel_vsec_id {
+	VSEC_ID_TELEMETRY	= 2,
+	VSEC_ID_WATCHER		= 3,
+	VSEC_ID_CRASHLOG	= 4,
+	VSEC_ID_SDSI		= 65,
+	VSEC_ID_TPMI		= 66,
+};
+
+/**
+ * struct intel_vsec_header - Common fields of Intel VSEC and DVSEC registers.
+ * @rev:         Revision ID of the VSEC/DVSEC register space
+ * @length:      Length of the VSEC/DVSEC register space
+ * @id:          ID of the feature
+ * @num_entries: Number of instances of the feature
+ * @entry_size:  Size of the discovery table for each feature
+ * @tbir:        BAR containing the discovery tables
+ * @offset:      BAR offset of start of the first discovery table
+ */
+struct intel_vsec_header {
+	u8	rev;
+	u16	length;
+	u16	id;
+	u8	num_entries;
+	u8	entry_size;
+	u8	tbir;
+	u32	offset;
+};
+
+enum intel_vsec_quirks {
+	/* Watcher feature not supported */
+	VSEC_QUIRK_NO_WATCHER	= BIT(0),
+
+	/* Crashlog feature not supported */
+	VSEC_QUIRK_NO_CRASHLOG	= BIT(1),
+
+	/* Use shift instead of mask to read discovery table offset */
+	VSEC_QUIRK_TABLE_SHIFT	= BIT(2),
+
+	/* DVSEC not present (provided in driver data) */
+	VSEC_QUIRK_NO_DVSEC	= BIT(3),
+
+	/* Platforms requiring quirk in the auxiliary driver */
+	VSEC_QUIRK_EARLY_HW     = BIT(4),
+};
+
+/**
+ * struct intel_vsec_platform_info - Platform specific data
+ * @parent:    parent device in the auxbus chain
+ * @headers:   list of headers to define the PMT client devices to create
+ * @caps:      bitmask of PMT capabilities for the given headers
+ * @quirks:    bitmask of VSEC device quirks
+ * @base_addr: allow a base address to be specified (rather than derived)
+ */
+struct intel_vsec_platform_info {
+	struct device *parent;
+	struct intel_vsec_header **headers;
+	unsigned long caps;
+	unsigned long quirks;
+	u64 base_addr;
+};
+
+/**
+ * struct intel_sec_device - Auxbus specific device information
+ * @auxdev:        auxbus device struct for auxbus access
+ * @pcidev:        pci device associated with the device
+ * @resource:      any resources shared by the parent
+ * @ida:           id reference
+ * @num_resources: number of resources
+ * @id:            xarray id
+ * @priv_data:     any private data needed
+ * @quirks:        specified quirks
+ * @base_addr:     base address of entries (if specified)
+ */
+struct intel_vsec_device {
+	struct auxiliary_device auxdev;
+	struct pci_dev *pcidev;
+	struct resource *resource;
+	struct ida *ida;
+	int num_resources;
+	int id; /* xa */
+	void *priv_data;
+	size_t priv_data_size;
+	unsigned long quirks;
+	u64 base_addr;
+};
+
+int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent,
+		       struct intel_vsec_device *intel_vsec_dev,
+		       const char *name);
+
+static inline struct intel_vsec_device *dev_to_ivdev(struct device *dev)
+{
+	return container_of(dev, struct intel_vsec_device, auxdev.dev);
+}
+
+static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device *auxdev)
+{
+	return container_of(auxdev, struct intel_vsec_device, auxdev);
+}
+
+#if IS_ENABLED(CONFIG_INTEL_VSEC)
+void intel_vsec_register(struct pci_dev *pdev,
+			 struct intel_vsec_platform_info *info);
+#else
+static inline void intel_vsec_register(struct pci_dev *pdev,
+				       struct intel_vsec_platform_info *info)
+{
+}
+#endif
+#endif
-- 
cgit v1.2.3


From e92affc74cd8624a548b380af7364be037adef35 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 25 Jul 2024 08:23:41 -0400
Subject: platform/x86/intel/vsec: Add PMT read callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some PMT providers require device specific actions before their telemetry
can be read. Provide assignable PMT read callbacks to allow providers to
perform those actions.

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://lore.kernel.org/r/20240725122346.4063913-3-michael.j.ruhl@intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/intel_vsec.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index 6495e37c9079..11ee185566c3 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -67,10 +67,24 @@ enum intel_vsec_quirks {
 	VSEC_QUIRK_EARLY_HW     = BIT(4),
 };
 
+/**
+ * struct pmt_callbacks - Callback infrastructure for PMT devices
+ * ->read_telem() when specified, called by client driver to access PMT data (instead
+ * of direct copy).
+ * @pdev:  PCI device reference for the callback's use
+ * @guid:  ID of data to acccss
+ * @data:  buffer for the data to be copied
+ * @count: size of buffer
+ */
+struct pmt_callbacks {
+	int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, u32 count);
+};
+
 /**
  * struct intel_vsec_platform_info - Platform specific data
  * @parent:    parent device in the auxbus chain
  * @headers:   list of headers to define the PMT client devices to create
+ * @priv_data: private data, usable by parent devices, currently a callback
  * @caps:      bitmask of PMT capabilities for the given headers
  * @quirks:    bitmask of VSEC device quirks
  * @base_addr: allow a base address to be specified (rather than derived)
@@ -78,6 +92,7 @@ enum intel_vsec_quirks {
 struct intel_vsec_platform_info {
 	struct device *parent;
 	struct intel_vsec_header **headers;
+	void *priv_data;
 	unsigned long caps;
 	unsigned long quirks;
 	u64 base_addr;
-- 
cgit v1.2.3


From 61b74964536e86445d43acff5cff6ad907ba9321 Mon Sep 17 00:00:00 2001
From: Jithu Joseph <jithu.joseph@intel.com>
Date: Thu, 1 Aug 2024 05:18:14 +0000
Subject: trace: platform/x86/intel/ifs: Add SBAF trace support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add tracing support for the SBAF IFS tests, which may be useful for
debugging systems that fail these tests. Log details like test content
batch number, SBAF bundle ID, program index and the exact errors or
warnings encountered by each HT thread during the test.

Reviewed-by: Ashok Raj <ashok.raj@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://lore.kernel.org/r/20240801051814.1935149-5-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/trace/events/intel_ifs.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h
index 0d88ebf2c980..70323acde1de 100644
--- a/include/trace/events/intel_ifs.h
+++ b/include/trace/events/intel_ifs.h
@@ -35,6 +35,33 @@ TRACE_EVENT(ifs_status,
 		__entry->status)
 );
 
+TRACE_EVENT(ifs_sbaf,
+
+	TP_PROTO(int batch, union ifs_sbaf activate, union ifs_sbaf_status status),
+
+	TP_ARGS(batch, activate, status),
+
+	TP_STRUCT__entry(
+		__field(	u64,	status	)
+		__field(	int,	batch	)
+		__field(	u16,	bundle	)
+		__field(	u16,	pgm	)
+	),
+
+	TP_fast_assign(
+		__entry->status	= status.data;
+		__entry->batch	= batch;
+		__entry->bundle	= activate.bundle_idx;
+		__entry->pgm	= activate.pgm_idx;
+	),
+
+	TP_printk("batch: 0x%.2x, bundle_idx: 0x%.4x, pgm_idx: 0x%.4x, status: 0x%.16llx",
+		__entry->batch,
+		__entry->bundle,
+		__entry->pgm,
+		__entry->status)
+);
+
 #endif /* _TRACE_IFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 29bc83e4d90546aa794a9584786086b141a6ba4d Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Mon, 15 Jul 2024 16:23:24 -0700
Subject: srcu: faster gp seq wrap-around

Using a higher value for the initial gp sequence counters allows for
wrapping to occur faster. It can help with surfacing any issues that may
be happening as a result of the wrap around.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcupdate.h |  3 +++
 include/linux/srcutree.h | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 13f6f00aecf9..8d56db70d417 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,6 +34,9 @@
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 
+#define RCU_SEQ_CTR_SHIFT    2
+#define RCU_SEQ_STATE_MASK   ((1 << RCU_SEQ_CTR_SHIFT) - 1)
+
 /* Exported common interfaces */
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 8f3f72480e78..ed57598394de 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -129,10 +129,23 @@ struct srcu_struct {
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
 
+/*
+ * Values for initializing gp sequence fields. Higher values allow wrap arounds to
+ * occur earlier.
+ * The second value with state is useful in the case of static initialization of
+ * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its
+ * lower bits (or else it will appear to be already initialized within
+ * the call check_init_srcu_struct()).
+ */
+#define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT)
+#define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1)
+
 #define __SRCU_USAGE_INIT(name)									\
 {												\
 	.lock = __SPIN_LOCK_UNLOCKED(name.lock),						\
-	.srcu_gp_seq_needed = -1UL,								\
+	.srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL,							\
+	.srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE,				\
+	.srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL,					\
 	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),					\
 }
 
-- 
cgit v1.2.3


From 711f5c5ce6c2c640c1b3b569ab2a8847be5ab21f Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 15 Jul 2024 21:22:51 -0400
Subject: lsm: cleanup lsm_hooks.h

Some cleanup and style corrections for lsm_hooks.h.

 * Drop the lsm_inode_alloc() extern declaration, it is not needed.
 * Relocate lsm_get_xattr_slot() and extern variables in the file to
   improve grouping of related objects.
 * Don't use tabs to needlessly align structure fields.

Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 87 +++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index f1ca8082075a..11ea0063228f 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -51,8 +51,8 @@ struct security_hook_heads {
  * Contains the information that identifies the LSM.
  */
 struct lsm_id {
-	const char	*name;
-	u64		id;
+	const char *name;
+	u64 id;
 };
 
 /*
@@ -60,49 +60,31 @@ struct lsm_id {
  * For use with generic list macros for common operations.
  */
 struct security_hook_list {
-	struct hlist_node		list;
-	struct hlist_head		*head;
-	union security_list_options	hook;
-	const struct lsm_id		*lsmid;
+	struct hlist_node list;
+	struct hlist_head *head;
+	union security_list_options hook;
+	const struct lsm_id *lsmid;
 } __randomize_layout;
 
 /*
  * Security blob size or offset data.
  */
 struct lsm_blob_sizes {
-	int	lbs_cred;
-	int	lbs_file;
-	int	lbs_ib;
-	int	lbs_inode;
-	int	lbs_sock;
-	int	lbs_superblock;
-	int	lbs_ipc;
-	int	lbs_key;
-	int	lbs_msg_msg;
-	int	lbs_perf_event;
-	int	lbs_task;
-	int	lbs_xattr_count; /* number of xattr slots in new_xattrs array */
-	int	lbs_tun_dev;
+	int lbs_cred;
+	int lbs_file;
+	int lbs_ib;
+	int lbs_inode;
+	int lbs_sock;
+	int lbs_superblock;
+	int lbs_ipc;
+	int lbs_key;
+	int lbs_msg_msg;
+	int lbs_perf_event;
+	int lbs_task;
+	int lbs_xattr_count; /* number of xattr slots in new_xattrs array */
+	int lbs_tun_dev;
 };
 
-/**
- * lsm_get_xattr_slot - Return the next available slot and increment the index
- * @xattrs: array storing LSM-provided xattrs
- * @xattr_count: number of already stored xattrs (updated)
- *
- * Retrieve the first available slot in the @xattrs array to fill with an xattr,
- * and increment @xattr_count.
- *
- * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise.
- */
-static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs,
-					       int *xattr_count)
-{
-	if (unlikely(!xattrs))
-		return NULL;
-	return &xattrs[(*xattr_count)++];
-}
-
 /*
  * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void
  * LSM hooks (in include/linux/lsm_hook_defs.h).
@@ -118,9 +100,6 @@ static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs,
 #define LSM_HOOK_INIT(HEAD, HOOK) \
 	{ .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } }
 
-extern struct security_hook_heads security_hook_heads;
-extern char *lsm_names;
-
 extern void security_add_hooks(struct security_hook_list *hooks, int count,
 			       const struct lsm_id *lsmid);
 
@@ -142,9 +121,6 @@ struct lsm_info {
 	struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */
 };
 
-extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
-extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
-
 #define DEFINE_LSM(lsm)							\
 	static struct lsm_info __lsm_##lsm				\
 		__used __section(".lsm_info.init")			\
@@ -155,6 +131,29 @@ extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
 		__used __section(".early_lsm_info.init")		\
 		__aligned(sizeof(unsigned long))
 
-extern int lsm_inode_alloc(struct inode *inode);
+/* DO NOT tamper with these variables outside of the LSM framework */
+extern char *lsm_names;
+extern struct security_hook_heads security_hook_heads;
+extern struct lsm_static_calls_table static_calls_table __ro_after_init;
+extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
+extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
+
+/**
+ * lsm_get_xattr_slot - Return the next available slot and increment the index
+ * @xattrs: array storing LSM-provided xattrs
+ * @xattr_count: number of already stored xattrs (updated)
+ *
+ * Retrieve the first available slot in the @xattrs array to fill with an xattr,
+ * and increment @xattr_count.
+ *
+ * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise.
+ */
+static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs,
+					       int *xattr_count)
+{
+	if (unlikely(!xattrs))
+		return NULL;
+	return &xattrs[(*xattr_count)++];
+}
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
-- 
cgit v1.2.3


From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: lsm: add the inode_free_security_rcu() LSM implementation hook

The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.

This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state.  Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.

Cc: stable@vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
 	 unsigned int obj_type)
 LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
 LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
 LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
 	 struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
 	 int *xattr_count)
-- 
cgit v1.2.3


From 42b0f8da3acc87953161baeb24f756936eb4d4b2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 31 Jul 2024 07:47:27 +0200
Subject: nsfs: fix ioctl declaration

The kernel is writing an object of type __u64, so the ioctl has to be
defined to _IOR(NSIO, 0x5, __u64) instead of _IO(NSIO, 0x5).

Reported-by: Dmitry V. Levin <ldv@strace.io>
Link: https://lore.kernel.org/r/20240730164554.GA18486@altlinux.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/nsfs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index b133211331f6..5fad3d0fcd70 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -3,6 +3,7 @@
 #define __LINUX_NSFS_H
 
 #include <linux/ioctl.h>
+#include <linux/types.h>
 
 #define NSIO	0xb7
 
@@ -16,7 +17,7 @@
 /* Get owner UID (in the caller's user namespace) for a user namespace */
 #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
 /* Get the id for a mount namespace */
-#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
+#define NS_GET_MNTNS_ID		_IOR(NSIO, 0x5, __u64)
 /* Translate pid from target pid namespace into the caller's pid namespace. */
 #define NS_GET_PID_FROM_PIDNS	_IOR(NSIO, 0x6, int)
 /* Return thread-group leader id of pid in the callers pid namespace. */
-- 
cgit v1.2.3


From 86509e38a80da34d7800985fa2be183475242c8c Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@grsecurity.net>
Date: Fri, 9 Aug 2024 15:50:35 +0200
Subject: file: fix typo in take_fd() comment

The explanatory comment above take_fd() contains a typo, fix that to not
confuse readers.

Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20240809135035.748109-1-minipli@grsecurity.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index 237931f20739..59b146a14dca 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
  *
  * f = dentry_open(&path, O_RDONLY, current_cred());
  * if (IS_ERR(f))
- *         return PTR_ERR(fd);
+ *         return PTR_ERR(f);
  *
  * fd_install(fd, f);
  * return take_fd(fd);
-- 
cgit v1.2.3


From 8e5ced7804cb9184c4a23f8054551240562a8eda Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Jul 2024 17:01:40 +0100
Subject: netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as
 a second writeback flag"

This reverts commit ae678317b95e760607c7b20b97c9cd4ca9ed6e1a.

Revert the patch that removes the deprecated use of PG_private_2 in
netfslib for the moment as Ceph is actually still using this to track
data copied to the cache.

Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
https: //lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index da23484268df..24ec3434d32e 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -145,6 +145,7 @@
 	EM(netfs_folio_trace_clear_g,		"clear-g")	\
 	EM(netfs_folio_trace_clear_s,		"clear-s")	\
 	EM(netfs_folio_trace_copy_to_cache,	"mark-copy")	\
+	EM(netfs_folio_trace_end_copy,		"end-copy")	\
 	EM(netfs_folio_trace_filled_gaps,	"filled-gaps")	\
 	EM(netfs_folio_trace_kill,		"kill")		\
 	EM(netfs_folio_trace_kill_cc,		"kill-cc")	\
-- 
cgit v1.2.3


From 7b589a9b45ae32aa9d7bece597490e141198d7a6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Aug 2024 19:38:46 +0100
Subject: netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags

The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
correctly.  The problem is that we try to set them up in the request
initialisation, but we the cache may be in the process of setting up still,
and so the state may not be correct.  Further, we secondarily sample the
cache state and make contradictory decisions later.

The issue arises because we set up the cache resources, which allows the
cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
triggers cache writing even if we didn't set the flags when allocating.

Fix this in the following way:

 (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
     ->init_request() rather than trying to juggle that in
     netfs_alloc_request().

 (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
     to be done, then PG_private_2 is to be used rather than only setting
     it if we decide to cache and then having netfs_rreq_unlock_folios()
     set the non-PG_private_2 writeback-to-cache if it wasn't set.

 (3) Split netfs_rreq_unlock_folios() into two functions, one of which
     contains the deprecated code for using PG_private_2 to avoid
     accidentally doing the writeback path - and always use it if
     USE_PGPRIV2 is set.

 (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
     wait for PG_private_2.  This function is deprecated and only used by
     ceph anyway, and so label it so.

 (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
     fscache_operation_valid() on the cache_resources instead.  This has
     the advantage of picking up the result of netfs_begin_cache_read() and
     fscache_begin_write_operation() - which are called after the object is
     initialised and will wait for the cache to come to a usable state.

Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
applied on top of that.  Without this as well, things like:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {

and:

 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386

may happen, along with some UAFs due to PG_private_2 not getting used to
wait on writeback completion.

Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Hristo Venev <hristo@venev.name>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1]
Link: https://lore.kernel.org/r/1173209.1723152682@warthog.procyon.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        | 3 ---
 include/trace/events/netfs.h | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d0288938cc2..983816608f15 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -73,8 +73,6 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
-#define NETFS_ICTX_USE_PGPRIV2	31		/* [DEPRECATED] Use PG_private_2 to mark
-						 * write to cache on read */
 };
 
 /*
@@ -269,7 +267,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
-#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED		10	/* We blocked */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 24ec3434d32e..606b4a0f92da 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -51,6 +51,7 @@
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
 	EM(netfs_rreq_trace_set_pause,		"PAUSE  ")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
+	EM(netfs_rreq_trace_unlock_pgpriv2,	"UNLCK-2")	\
 	EM(netfs_rreq_trace_unmark,		"UNMARK ")	\
 	EM(netfs_rreq_trace_wait_ip,		"WAIT-IP")	\
 	EM(netfs_rreq_trace_wait_pause,		"WT-PAUS")	\
-- 
cgit v1.2.3


From fdad456cbcca739bae1849549c7a999857c56f88 Mon Sep 17 00:00:00 2001
From: Leon Hwang <leon.hwang@linux.dev>
Date: Sun, 28 Jul 2024 19:46:11 +0800
Subject: bpf: Fix updating attached freplace prog in prog_array map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
fixed a NULL pointer dereference panic, but didn't fix the issue that
fails to update attached freplace prog to prog_array map.

Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"),
freplace prog and its target prog are able to tail call each other.

And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach")
sets prog->aux->dst_prog as NULL after attaching freplace prog to its
target prog.

After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS.
Then, after attaching freplace its prog->aux->dst_prog is NULL.
Then, while updating freplace in prog_array the bpf_prog_map_compatible()
incorrectly returns false because resolve_prog_type() returns
BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS.
After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS
and update to prog_array can succeed.

Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6503c85b10a3..7b776dae36e5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -856,8 +856,8 @@ static inline u32 type_flag(u32 type)
 /* only use after check_attach_btf_id() */
 static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
 {
-	return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ?
-		prog->aux->dst_prog->type : prog->type;
+	return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ?
+		prog->aux->saved_dst_prog_type : prog->type;
 }
 
 static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
-- 
cgit v1.2.3


From 246ef40670b71fef0c3e2cd11404279bc6d6468e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Mon, 29 Jul 2024 17:30:10 -0700
Subject: ipv6: eliminate ndisc_ops_is_useropt()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

as it doesn't seem to offer anything of value.

There's only 1 trivial user:
  int lowpan_ndisc_is_useropt(u8 nd_opt_type) {
    return nd_opt_type == ND_OPT_6CO;
  }

but there's no harm to always treating that as
a useropt...

Cc: David Ahern <dsahern@kernel.org>
Cc: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Link: https://patch.msgid.link/20240730003010.156977-1-maze@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ndisc.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 7a533d5b1d59..3c88d5bc5eed 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -147,11 +147,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
  * The following hooks can be defined; unless noted otherwise, they are
  * optional and can be filled with a null pointer.
  *
- * int (*is_useropt)(u8 nd_opt_type):
- *     This function is called when IPv6 decide RA userspace options. if
- *     this function returns 1 then the option given by nd_opt_type will
- *     be handled as userspace option additional to the IPv6 options.
- *
  * int (*parse_options)(const struct net_device *dev,
  *			struct nd_opt_hdr *nd_opt,
  *			struct ndisc_options *ndopts):
@@ -200,7 +195,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
  *     addresses. E.g. 802.15.4 6LoWPAN.
  */
 struct ndisc_ops {
-	int	(*is_useropt)(u8 nd_opt_type);
 	int	(*parse_options)(const struct net_device *dev,
 				 struct nd_opt_hdr *nd_opt,
 				 struct ndisc_options *ndopts);
@@ -224,15 +218,6 @@ struct ndisc_ops {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline int ndisc_ops_is_useropt(const struct net_device *dev,
-				       u8 nd_opt_type)
-{
-	if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
-		return dev->ndisc_ops->is_useropt(nd_opt_type);
-	else
-		return 0;
-}
-
 static inline int ndisc_ops_parse_options(const struct net_device *dev,
 					  struct nd_opt_hdr *nd_opt,
 					  struct ndisc_options *ndopts)
-- 
cgit v1.2.3


From 75bab45e6b2da379fe2ebda48ed35f8ce371a2ef Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 7 Aug 2024 16:13:46 +0200
Subject: net: nexthop: Add flag to assert that NHGRP reserved fields are zero

There are many unpatched kernel versions out there that do not initialize
the reserved fields of struct nexthop_grp. The issue with that is that if
those fields were to be used for some end (i.e. stop being reserved), old
kernels would still keep sending random data through the field, and a new
userspace could not rely on the value.

In this patch, use the existing NHA_OP_FLAGS, which is currently inbound
only, to carry flags back to the userspace. Add a flag to indicate that the
reserved fields in struct nexthop_grp are zeroed before dumping. This is
reliant on the actual fix from commit 6d745cd0e972 ("net: nexthop:
Initialize all fields in dumped nexthops").

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/21037748d4f9d8ff486151f4c09083bcf12d5df8.1723036486.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/nexthop.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index dd8787f9cf39..f4f060a87cc2 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -33,6 +33,9 @@ enum {
 #define NHA_OP_FLAG_DUMP_STATS		BIT(0)
 #define NHA_OP_FLAG_DUMP_HW_STATS	BIT(1)
 
+/* Response OP_FLAGS. */
+#define NHA_OP_FLAG_RESP_GRP_RESVD_0	BIT(31)	/* Dump clears resvd fields. */
+
 enum {
 	NHA_UNSPEC,
 	NHA_ID,		/* u32; id for nexthop. id == 0 means auto-assign */
-- 
cgit v1.2.3


From b72a6a7ab9573e06d5c2fcb92eaa28614a735bfd Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 7 Aug 2024 16:13:47 +0200
Subject: net: nexthop: Increase weight to u16

In CLOS networks, as link failures occur at various points in the network,
ECMP weights of the involved nodes are adjusted to compensate. With high
fan-out of the involved nodes, and overall high number of nodes,
a (non-)ECMP weight ratio that we would like to configure does not fit into
8 bits. Instead of, say, 255:254, we might like to configure something like
1000:999. For these deployments, the 8-bit weight may not be enough.

To that end, in this patch increase the next hop weight from u8 to u16.

Increasing the width of an integral type can be tricky, because while the
code still compiles, the types may not check out anymore, and numerical
errors come up. To prevent this, the conversion was done in two steps.
First the type was changed from u8 to a single-member structure, which
invalidated all uses of the field. This allowed going through them one by
one and audit for type correctness. Then the structure was replaced with a
vanilla u16 again. This should ensure that no place was missed.

The UAPI for configuring nexthop group members is that an attribute
NHA_GROUP carries an array of struct nexthop_grp entries:

	struct nexthop_grp {
		__u32	id;	  /* nexthop id - must exist */
		__u8	weight;   /* weight of this nexthop */
		__u8	resvd1;
		__u16	resvd2;
	};

The field resvd1 is currently validated and required to be zero. We can
lift this requirement and carry high-order bits of the weight in the
reserved field:

	struct nexthop_grp {
		__u32	id;	  /* nexthop id - must exist */
		__u8	weight;   /* weight of this nexthop */
		__u8	weight_high;
		__u16	resvd2;
	};

Keeping the fields split this way was chosen in case an existing userspace
makes assumptions about the width of the weight field, and to sidestep any
endianness issues.

The weight field is currently encoded as the weight value minus one,
because weight of 0 is invalid. This same trick is impossible for the new
weight_high field, because zero must mean actual zero. With this in place:

- Old userspace is guaranteed to carry weight_high of 0, therefore
  configuring 8-bit weights as appropriate. When dumping nexthops with
  16-bit weight, it would only show the lower 8 bits. But configuring such
  nexthops implies existence of userspace aware of the extension in the
  first place.

- New userspace talking to an old kernel will work as long as it only
  attempts to configure 8-bit weights, where the high-order bits are zero.
  Old kernel will bounce attempts at configuring >8-bit weights.

Renaming reserved fields as they are allocated for some purpose is commonly
done in Linux. Whoever touches a reserved field is doing so at their own
risk. nexthop_grp::resvd1 in particular is currently used by at least
strace, however they carry an own copy of UAPI headers, and the conversion
should be trivial. A helper is provided for decoding the weight out of the
two fields. Forcing a conversion seems preferable to bending backwards and
introducing anonymous unions or whatever.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/483e2fcf4beb0d9135d62e7d27b46fa2685479d4.1723036486.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/nexthop.h        | 4 ++--
 include/uapi/linux/nexthop.h | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 68463aebcc05..d9fb44e8b321 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -105,7 +105,7 @@ struct nh_grp_entry_stats {
 struct nh_grp_entry {
 	struct nexthop	*nh;
 	struct nh_grp_entry_stats __percpu	*stats;
-	u8		weight;
+	u16		weight;
 
 	union {
 		struct {
@@ -192,7 +192,7 @@ struct nh_notifier_single_info {
 };
 
 struct nh_notifier_grp_entry_info {
-	u8 weight;
+	u16 weight;
 	struct nh_notifier_single_info nh;
 };
 
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index f4f060a87cc2..bc49baf4a267 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -16,10 +16,15 @@ struct nhmsg {
 struct nexthop_grp {
 	__u32	id;	  /* nexthop id - must exist */
 	__u8	weight;   /* weight of this nexthop */
-	__u8	resvd1;
+	__u8	weight_high;	/* high order bits of weight */
 	__u16	resvd2;
 };
 
+static inline __u16 nexthop_grp_weight(const struct nexthop_grp *entry)
+{
+	return ((entry->weight_high << 8) | entry->weight) + 1;
+}
+
 enum {
 	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group
 				  * default type if not specified
-- 
cgit v1.2.3


From 1da91ea87aefe2c25b68c9f96947a9271ba6325d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 May 2024 14:12:01 -0400
Subject: introduce fd_file(), convert all accessors to it.

	For any changes of struct fd representation we need to
turn existing accesses to fields into calls of wrappers.
Accesses to struct fd::flags are very few (3 in linux/file.h,
1 in net/socket.c, 3 in fs/overlayfs/file.c and 3 more in
explicit initializers).
	Those can be dealt with in the commit converting to
new layout; accesses to struct fd::file are too many for that.
	This commit converts (almost) all of f.file to
fd_file(f).  It's not entirely mechanical ('file' is used as
a member name more than just in struct fd) and it does not
even attempt to distinguish the uses in pointer context from
those in boolean context; the latter will be eventually turned
into a separate helper (fd_empty()).

	NOTE: mass conversion to fd_empty(), tempting as it
might be, is a bad idea; better do that piecewise in commit
that convert from fdget...() to CLASS(...).

[conflicts in fs/fhandle.c, kernel/bpf/syscall.c, mm/memcontrol.c
caught by git; fs/stat.c one got caught by git grep]
[fs/xattr.c conflict]

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/cleanup.h | 2 +-
 include/linux/file.h    | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index d9e613803df1..a3d3e888cf1f 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -98,7 +98,7 @@ const volatile void * __must_check_fn(const volatile void *val)
  * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
  *
  *	CLASS(fdget, f)(fd);
- *	if (!f.file)
+ *	if (!fd_file(f))
  *		return -EBADF;
  *
  *	// use 'f' without concern
diff --git a/include/linux/file.h b/include/linux/file.h
index 237931f20739..0f3f369f2450 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -42,10 +42,12 @@ struct fd {
 #define FDPUT_FPUT       1
 #define FDPUT_POS_UNLOCK 2
 
+#define fd_file(f) ((f).file)
+
 static inline void fdput(struct fd fd)
 {
 	if (fd.flags & FDPUT_FPUT)
-		fput(fd.file);
+		fput(fd_file(fd));
 }
 
 extern struct file *fget(unsigned int fd);
@@ -79,7 +81,7 @@ static inline struct fd fdget_pos(int fd)
 static inline void fdput_pos(struct fd f)
 {
 	if (f.flags & FDPUT_POS_UNLOCK)
-		__f_unlock_pos(f.file);
+		__f_unlock_pos(fd_file(f));
 	fdput(f);
 }
 
-- 
cgit v1.2.3


From 88a2f6468d013ca1163490dbddfc95135d1c27a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 May 2024 15:45:12 -0400
Subject: struct fd: representation change

	We want the compiler to see that fdput() on empty instance
is a no-op.  The emptiness check is that file reference is NULL,
while fdput() is "fput() if FDPUT_FPUT is present in flags".
The reason why fdput() on empty instance is a no-op is something
compiler can't see - it's that we never generate instances with
NULL file reference combined with non-zero flags.

	It's not that hard to deal with - the real primitives behind
fdget() et.al. are returning an unsigned long value, unpacked by (inlined)
__to_fd() into the current struct file * + int.  The lower bits are
used to store flags, while the rest encodes the pointer.  Linus suggested
that keeping this unsigned long around with the extractions done by inlined
accessors should generate a sane code and that turns out to be the case.
Namely, turning struct fd into a struct-wrapped unsinged long, with
        fd_empty(f) => unlikely(f.word == 0)
	fd_file(f) => (struct file *)(f.word & ~3)
	fdput(f) => if (f.word & 1) fput(fd_file(f))
ends up with compiler doing the right thing.  The cost is the patch
footprint, of course - we need to switch f.file to fd_file(f) all over
the tree, and it's not doable with simple search and replace; there are
false positives, etc.

	Note that the sole member of that structure is an opaque
unsigned long - all accesses should be done via wrappers and I don't
want to use a name that would invite manual casts to file pointers,
etc.  The value of that member is equal either to (unsigned long)p | flags,
p being an address of some struct file instance, or to 0 for an empty fd.

	For now the new predicate (fd_empty(f)) has no users; all the
existing checks have form (!fd_file(f)).  We will convert to fd_empty()
use later; here we only define it (and tell the compiler that it's
unlikely to return true).

	This commit only deals with representation change; there will
be followups.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/file.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index 0f3f369f2450..eb28469b1c16 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -35,18 +35,28 @@ static inline void fput_light(struct file *file, int fput_needed)
 		fput(file);
 }
 
+/* either a reference to struct file + flags
+ * (cloned vs. borrowed, pos locked), with
+ * flags stored in lower bits of value,
+ * or empty (represented by 0).
+ */
 struct fd {
-	struct file *file;
-	unsigned int flags;
+	unsigned long word;
 };
 #define FDPUT_FPUT       1
 #define FDPUT_POS_UNLOCK 2
 
-#define fd_file(f) ((f).file)
+#define fd_file(f) ((struct file *)((f).word & ~(FDPUT_FPUT|FDPUT_POS_UNLOCK)))
+static inline bool fd_empty(struct fd f)
+{
+	return unlikely(!f.word);
+}
+
+#define EMPTY_FD (struct fd){0}
 
 static inline void fdput(struct fd fd)
 {
-	if (fd.flags & FDPUT_FPUT)
+	if (fd.word & FDPUT_FPUT)
 		fput(fd_file(fd));
 }
 
@@ -60,7 +70,7 @@ extern void __f_unlock_pos(struct file *);
 
 static inline struct fd __to_fd(unsigned long v)
 {
-	return (struct fd){(struct file *)(v & ~3),v & 3};
+	return (struct fd){v};
 }
 
 static inline struct fd fdget(unsigned int fd)
@@ -80,7 +90,7 @@ static inline struct fd fdget_pos(int fd)
 
 static inline void fdput_pos(struct fd f)
 {
-	if (f.flags & FDPUT_POS_UNLOCK)
+	if (f.word & FDPUT_POS_UNLOCK)
 		__f_unlock_pos(fd_file(f));
 	fdput(f);
 }
-- 
cgit v1.2.3


From de12c3391bce10504c0e7bd767516c74110cfce1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 May 2024 16:34:25 -0400
Subject: add struct fd constructors, get rid of __to_fd()

	Make __fdget() et.al. return struct fd directly.
New helpers: BORROWED_FD(file) and CLONED_FD(file), for
borrowed and cloned file references resp.

	NOTE: this might need tuning; in particular, inline on
__fget_light() is there to keep the code generation same as
before - we probably want to keep it inlined in fdget() et.al.
(especially so in fdget_pos()), but that needs profiling.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/file.h | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index eb28469b1c16..aab1caff6713 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -53,6 +53,14 @@ static inline bool fd_empty(struct fd f)
 }
 
 #define EMPTY_FD (struct fd){0}
+static inline struct fd BORROWED_FD(struct file *f)
+{
+	return (struct fd){(unsigned long)f};
+}
+static inline struct fd CLONED_FD(struct file *f)
+{
+	return (struct fd){(unsigned long)f | FDPUT_FPUT};
+}
 
 static inline void fdput(struct fd fd)
 {
@@ -63,30 +71,11 @@ static inline void fdput(struct fd fd)
 extern struct file *fget(unsigned int fd);
 extern struct file *fget_raw(unsigned int fd);
 extern struct file *fget_task(struct task_struct *task, unsigned int fd);
-extern unsigned long __fdget(unsigned int fd);
-extern unsigned long __fdget_raw(unsigned int fd);
-extern unsigned long __fdget_pos(unsigned int fd);
 extern void __f_unlock_pos(struct file *);
 
-static inline struct fd __to_fd(unsigned long v)
-{
-	return (struct fd){v};
-}
-
-static inline struct fd fdget(unsigned int fd)
-{
-	return __to_fd(__fdget(fd));
-}
-
-static inline struct fd fdget_raw(unsigned int fd)
-{
-	return __to_fd(__fdget_raw(fd));
-}
-
-static inline struct fd fdget_pos(int fd)
-{
-	return __to_fd(__fdget_pos(fd));
-}
+struct fd fdget(unsigned int fd);
+struct fd fdget_raw(unsigned int fd);
+struct fd fdget_pos(unsigned int fd);
 
 static inline void fdput_pos(struct fd f)
 {
-- 
cgit v1.2.3


From b9d104465a6c2f1bb27ae247bd90b4ab079a1699 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Sun, 11 Aug 2024 17:37:56 +0300
Subject: scsi: ufs: Prepare to add HCI capabilities sysfs

Prepare so we'll be able to read various other HCI registers.  While at it,
fix the HCPID & HCMID register names to stand for what they really are.
Also replace the pm_runtime_{get/put}_sync() calls in auto_hibern8_show to
ufshcd_rpm_{get/put}_sync() as any host controller register reads should.

Reviewed-by: Keoseong Park <keosung.park@samsung.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20240811143757.2538212-2-avri.altman@wdc.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshci.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index 38fe97971a65..194e3655902e 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -25,8 +25,9 @@ enum {
 	REG_CONTROLLER_CAPABILITIES		= 0x00,
 	REG_MCQCAP				= 0x04,
 	REG_UFS_VERSION				= 0x08,
-	REG_CONTROLLER_DEV_ID			= 0x10,
-	REG_CONTROLLER_PROD_ID			= 0x14,
+	REG_EXT_CONTROLLER_CAPABILITIES		= 0x0C,
+	REG_CONTROLLER_PID			= 0x10,
+	REG_CONTROLLER_MID			= 0x14,
 	REG_AUTO_HIBERNATE_IDLE_TIMER		= 0x18,
 	REG_INTERRUPT_STATUS			= 0x20,
 	REG_INTERRUPT_ENABLE			= 0x24,
-- 
cgit v1.2.3


From 12dbc67c3b0bcd7414b511fd8a42ba4e388705bc Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Wed, 7 Aug 2024 21:45:28 +0800
Subject: net: stmmac: Move the atds flag to the stmmac_dma_cfg structure

ATDS (Alternate Descriptor Size) is a part of the DMA Bus Mode configs
(together with PBL, ALL, EME, etc) of the DW GMAC controllers. Seeing
it's not changed at runtime but is activated as long as the IP-core
has it supported (at least due to the Type 2 Full Checksum Offload
Engine feature), move the respective parameter from the
stmmac_dma_ops::init() callback argument to the stmmac_dma_cfg
structure, which already have the rest of the DMA-related configs
defined.

Besides the being added in the next commit DW GMAC multi-channels
support will require to add the stmmac_dma_ops::init_chan() callback
and have the ATDS flag set/cleared for each channel in there. Having
the atds-flag in the stmmac_dma_cfg structure will make the parameter
accessible from stmmac_dma_ops::init_chan() callback too.

Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Yinggang Gu <guyinggang@loongson.cn>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 84e13bd5df28..338991c08f00 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -100,6 +100,7 @@ struct stmmac_dma_cfg {
 	bool eame;
 	bool multi_msi_en;
 	bool dche;
+	bool atds;
 };
 
 #define AXI_BLEN	7
-- 
cgit v1.2.3


From c343e66ed009fdb6206787558130ccbd504ffc12 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 10 Aug 2024 22:52:17 +0200
Subject: usb: gadget: configfs: Make check_user_usb_string() static

"linux/usb/gadget_configfs.h" is only included in
"drivers/usb/gadget/configfs.c", so there is no need to declare a function
in the header file. it is only used in this .c file.

It's better to have it static.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/958cb49dca1bff4254a3492c018efbf3b01918b4.1723323107.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/gadget_configfs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h
index d61aebd68128..6a552dd4dec9 100644
--- a/include/linux/usb/gadget_configfs.h
+++ b/include/linux/usb/gadget_configfs.h
@@ -4,9 +4,6 @@
 
 #include <linux/configfs.h>
 
-int check_user_usb_string(const char *name,
-		struct usb_gadget_strings *stringtab_dev);
-
 #define GS_STRINGS_W(__struct, __name)	\
 static ssize_t __struct##_##__name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
-- 
cgit v1.2.3


From d1e14e06810a96ef0cdabf572513594031d4dad6 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 10 Aug 2024 23:05:46 +0200
Subject: usb: gadget: configfs: Constify struct config_item_type

'struct config_item_type' is not modified in this file.

Apparently, these structures are only used with
config_group_init_type_name() which takes a const struct config_item_type*
as a 3rd argument.

Constifying this structure moves some data to a read-only section, so
increase overall security, especially when the structure holds some
function pointers.

On a x86_64, with allmodconfig:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  40834	   5112	     64	  46010	   b3ba	drivers/usb/gadget/configfs.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  41218	   4728	     64	  46010	   b3ba	drivers/usb/gadget/configfs.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/513223e97082e1bb758e36d55c175ec9ea34a71c.1723323896.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/gadget_configfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h
index 6a552dd4dec9..6b5d6838f865 100644
--- a/include/linux/usb/gadget_configfs.h
+++ b/include/linux/usb/gadget_configfs.h
@@ -34,7 +34,7 @@ static struct configfs_item_operations struct_in##_langid_item_ops = {	\
 	.release                = struct_in##_attr_release,		\
 };									\
 									\
-static struct config_item_type struct_in##_langid_type = {		\
+static const struct config_item_type struct_in##_langid_type = {	\
 	.ct_item_ops	= &struct_in##_langid_item_ops,			\
 	.ct_attrs	= struct_in##_langid_attrs,			\
 	.ct_owner	= THIS_MODULE,					\
@@ -91,7 +91,7 @@ static struct configfs_group_operations struct_in##_strings_ops = {	\
 	.drop_item      = &struct_in##_strings_drop,			\
 };									\
 									\
-static struct config_item_type struct_in##_strings_type = {		\
+static const struct config_item_type struct_in##_strings_type = {	\
 	.ct_group_ops   = &struct_in##_strings_ops,			\
 	.ct_owner       = THIS_MODULE,					\
 }
-- 
cgit v1.2.3


From c26cee817f8bd9a22bfade20f739ec2fc6f20221 Mon Sep 17 00:00:00 2001
From: David Sands <david.sands@biamp.com>
Date: Sat, 10 Aug 2024 20:00:05 -0400
Subject: usb: gadget: f_fs: add capability for dfu functional descriptor

Add the ability for the USB FunctionFS (FFS) gadget driver to be able
to create Device Firmware Upgrade (DFU) functional descriptors. [1]

This patch allows implementation of DFU in userspace using the
FFS gadget. The DFU protocol uses the control pipe (ep0) for all
messaging so only the addition of the DFU functional descriptor
is needed in the kernel driver.

The DFU functional descriptor is written to the ep0 file along with
any other descriptors during FFS setup. DFU requires an interface
descriptor followed by the DFU functional descriptor.

This patch includes documentation of the added descriptor for DFU
and conversion of some existing documentation to kernel-doc format
so that it can be included in the generated docs.

An implementation of DFU 1.1 that implements just the runtime descriptor
using the FunctionFS gadget (with rebooting into u-boot for DFU mode)
has been tested on an i.MX8 Nano.

An implementation of DFU 1.1 that implements both runtime and DFU mode
using the FunctionFS gadget has been tested on Xilinx Zynq UltraScale+.
Note that for the best performance of firmware update file transfers, the
userspace program should respond as quick as possible to the setup packets.

[1] https://www.usb.org/sites/default/files/DFU_1.1.pdf

Signed-off-by: David Sands <david.sands@biamp.com>
Co-developed-by: Chris Wulff <crwulff@gmail.com>
Signed-off-by: Chris Wulff <crwulff@gmail.com>
Link: https://lore.kernel.org/r/20240811000004.1395888-2-crwulff@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/usb/ch9.h        |  8 ++-
 include/uapi/linux/usb/functionfs.h | 97 ++++++++++++++++++++++++++++++++++---
 2 files changed, 95 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index 44d73ba8788d..91f0f7e214a5 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -254,6 +254,9 @@ struct usb_ctrlrequest {
 #define USB_DT_DEVICE_CAPABILITY	0x10
 #define USB_DT_WIRELESS_ENDPOINT_COMP	0x11
 #define USB_DT_WIRE_ADAPTER		0x21
+/* From USB Device Firmware Upgrade Specification, Revision 1.1 */
+#define USB_DT_DFU_FUNCTIONAL		0x21
+/* these are from the Wireless USB spec */
 #define USB_DT_RPIPE			0x22
 #define USB_DT_CS_RADIO_CONTROL		0x23
 /* From the T10 UAS specification */
@@ -329,9 +332,10 @@ struct usb_device_descriptor {
 #define USB_CLASS_USB_TYPE_C_BRIDGE	0x12
 #define USB_CLASS_MISC			0xef
 #define USB_CLASS_APP_SPEC		0xfe
-#define USB_CLASS_VENDOR_SPEC		0xff
+#define USB_SUBCLASS_DFU			0x01
 
-#define USB_SUBCLASS_VENDOR_SPEC	0xff
+#define USB_CLASS_VENDOR_SPEC		0xff
+#define USB_SUBCLASS_VENDOR_SPEC		0xff
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 9f88de9c3d66..2ebdba111a8f 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -3,6 +3,7 @@
 #define _UAPI__LINUX_FUNCTIONFS_H__
 
 
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
@@ -37,6 +38,31 @@ struct usb_endpoint_descriptor_no_audio {
 	__u8  bInterval;
 } __attribute__((packed));
 
+/**
+ * struct usb_dfu_functional_descriptor - DFU Functional descriptor
+ * @bLength:		Size of the descriptor (bytes)
+ * @bDescriptorType:	USB_DT_DFU_FUNCTIONAL
+ * @bmAttributes:	DFU attributes
+ * @wDetachTimeOut:	Maximum time to wait after DFU_DETACH (ms, le16)
+ * @wTransferSize:	Maximum number of bytes per control-write (le16)
+ * @bcdDFUVersion:	DFU Spec version (BCD, le16)
+ */
+struct usb_dfu_functional_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bmAttributes;
+	__le16 wDetachTimeOut;
+	__le16 wTransferSize;
+	__le16 bcdDFUVersion;
+} __attribute__ ((packed));
+
+/* from DFU functional descriptor bmAttributes */
+#define DFU_FUNC_ATT_CAN_DOWNLOAD	_BITUL(0)
+#define DFU_FUNC_ATT_CAN_UPLOAD		_BITUL(1)
+#define DFU_FUNC_ATT_MANIFEST_TOLERANT	_BITUL(2)
+#define DFU_FUNC_ATT_WILL_DETACH	_BITUL(3)
+
+
 struct usb_functionfs_descs_head_v2 {
 	__le32 magic;
 	__le32 length;
@@ -104,23 +130,38 @@ struct usb_ffs_dmabuf_transfer_req {
 
 #ifndef __KERNEL__
 
-/*
+/**
+ * DOC: descriptors
+ *
  * Descriptors format:
  *
+ * +-----+-----------+--------------+--------------------------------------+
  * | off | name      | type         | description                          |
- * |-----+-----------+--------------+--------------------------------------|
+ * +-----+-----------+--------------+--------------------------------------+
  * |   0 | magic     | LE32         | FUNCTIONFS_DESCRIPTORS_MAGIC_V2      |
+ * +-----+-----------+--------------+--------------------------------------+
  * |   4 | length    | LE32         | length of the whole data chunk       |
+ * +-----+-----------+--------------+--------------------------------------+
  * |   8 | flags     | LE32         | combination of functionfs_flags      |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | eventfd   | LE32         | eventfd file descriptor              |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | fs_count  | LE32         | number of full-speed descriptors     |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | hs_count  | LE32         | number of high-speed descriptors     |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | ss_count  | LE32         | number of super-speed descriptors    |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | os_count  | LE32         | number of MS OS descriptors          |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | fs_descrs | Descriptor[] | list of full-speed descriptors       |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | ss_descrs | Descriptor[] | list of super-speed descriptors      |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | os_descrs | OSDesc[]     | list of MS OS descriptors            |
+ * +-----+-----------+--------------+--------------------------------------+
  *
  * Depending on which flags are set, various fields may be missing in the
  * structure.  Any flags that are not recognised cause the whole block to be
@@ -128,71 +169,111 @@ struct usb_ffs_dmabuf_transfer_req {
  *
  * Legacy descriptors format (deprecated as of 3.14):
  *
+ * +-----+-----------+--------------+--------------------------------------+
  * | off | name      | type         | description                          |
- * |-----+-----------+--------------+--------------------------------------|
+ * +-----+-----------+--------------+--------------------------------------+
  * |   0 | magic     | LE32         | FUNCTIONFS_DESCRIPTORS_MAGIC         |
+ * +-----+-----------+--------------+--------------------------------------+
  * |   4 | length    | LE32         | length of the whole data chunk       |
+ * +-----+-----------+--------------+--------------------------------------+
  * |   8 | fs_count  | LE32         | number of full-speed descriptors     |
+ * +-----+-----------+--------------+--------------------------------------+
  * |  12 | hs_count  | LE32         | number of high-speed descriptors     |
+ * +-----+-----------+--------------+--------------------------------------+
  * |  16 | fs_descrs | Descriptor[] | list of full-speed descriptors       |
+ * +-----+-----------+--------------+--------------------------------------+
  * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
+ * +-----+-----------+--------------+--------------------------------------+
  *
  * All numbers must be in little endian order.
  *
  * Descriptor[] is an array of valid USB descriptors which have the following
  * format:
  *
+ * +-----+-----------------+------+--------------------------+
  * | off | name            | type | description              |
- * |-----+-----------------+------+--------------------------|
+ * +-----+-----------------+------+--------------------------+
  * |   0 | bLength         | U8   | length of the descriptor |
+ * +-----+-----------------+------+--------------------------+
  * |   1 | bDescriptorType | U8   | descriptor type          |
+ * +-----+-----------------+------+--------------------------+
  * |   2 | payload         |      | descriptor's payload     |
+ * +-----+-----------------+------+--------------------------+
  *
  * OSDesc[] is an array of valid MS OS Feature Descriptors which have one of
  * the following formats:
  *
+ * +-----+-----------------+------+--------------------------+
  * | off | name            | type | description              |
- * |-----+-----------------+------+--------------------------|
+ * +-----+-----------------+------+--------------------------+
  * |   0 | inteface        | U8   | related interface number |
+ * +-----+-----------------+------+--------------------------+
  * |   1 | dwLength        | U32  | length of the descriptor |
+ * +-----+-----------------+------+--------------------------+
  * |   5 | bcdVersion      | U16  | currently supported: 1   |
+ * +-----+-----------------+------+--------------------------+
  * |   7 | wIndex          | U16  | currently supported: 4   |
+ * +-----+-----------------+------+--------------------------+
  * |   9 | bCount          | U8   | number of ext. compat.   |
+ * +-----+-----------------+------+--------------------------+
  * |  10 | Reserved        | U8   | 0                        |
+ * +-----+-----------------+------+--------------------------+
  * |  11 | ExtCompat[]     |      | list of ext. compat. d.  |
+ * +-----+-----------------+------+--------------------------+
  *
+ * +-----+-----------------+------+--------------------------+
  * | off | name            | type | description              |
- * |-----+-----------------+------+--------------------------|
+ * +-----+-----------------+------+--------------------------+
  * |   0 | inteface        | U8   | related interface number |
+ * +-----+-----------------+------+--------------------------+
  * |   1 | dwLength        | U32  | length of the descriptor |
+ * +-----+-----------------+------+--------------------------+
  * |   5 | bcdVersion      | U16  | currently supported: 1   |
+ * +-----+-----------------+------+--------------------------+
  * |   7 | wIndex          | U16  | currently supported: 5   |
+ * +-----+-----------------+------+--------------------------+
  * |   9 | wCount          | U16  | number of ext. compat.   |
+ * +-----+-----------------+------+--------------------------+
  * |  11 | ExtProp[]       |      | list of ext. prop. d.    |
+ * +-----+-----------------+------+--------------------------+
  *
  * ExtCompat[] is an array of valid Extended Compatiblity descriptors
  * which have the following format:
  *
+ * +-----+-----------------------+------+-------------------------------------+
  * | off | name                  | type | description                         |
- * |-----+-----------------------+------+-------------------------------------|
+ * +-----+-----------------------+------+-------------------------------------+
  * |   0 | bFirstInterfaceNumber | U8   | index of the interface or of the 1st|
+ * +-----+-----------------------+------+-------------------------------------+
  * |     |                       |      | interface in an IAD group           |
+ * +-----+-----------------------+------+-------------------------------------+
  * |   1 | Reserved              | U8   | 1                                   |
+ * +-----+-----------------------+------+-------------------------------------+
  * |   2 | CompatibleID          | U8[8]| compatible ID string                |
+ * +-----+-----------------------+------+-------------------------------------+
  * |  10 | SubCompatibleID       | U8[8]| subcompatible ID string             |
+ * +-----+-----------------------+------+-------------------------------------+
  * |  18 | Reserved              | U8[6]| 0                                   |
+ * +-----+-----------------------+------+-------------------------------------+
  *
  * ExtProp[] is an array of valid Extended Properties descriptors
  * which have the following format:
  *
+ * +-----+-----------------------+------+-------------------------------------+
  * | off | name                  | type | description                         |
- * |-----+-----------------------+------+-------------------------------------|
+ * +-----+-----------------------+------+-------------------------------------+
  * |   0 | dwSize                | U32  | length of the descriptor            |
+ * +-----+-----------------------+------+-------------------------------------+
  * |   4 | dwPropertyDataType    | U32  | 1..7                                |
+ * +-----+-----------------------+------+-------------------------------------+
  * |   8 | wPropertyNameLength   | U16  | bPropertyName length (NL)           |
+ * +-----+-----------------------+------+-------------------------------------+
  * |  10 | bPropertyName         |U8[NL]| name of this property               |
+ * +-----+-----------------------+------+-------------------------------------+
  * |10+NL| dwPropertyDataLength  | U32  | bPropertyData length (DL)           |
+ * +-----+-----------------------+------+-------------------------------------+
  * |14+NL| bProperty             |U8[DL]| payload of this property            |
+ * +-----+-----------------------+------+-------------------------------------+
  */
 
 struct usb_functionfs_strings_head {
-- 
cgit v1.2.3


From 92567a5f92bc947fb7aa4351979db1b7b71a554c Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 8 Aug 2024 22:06:19 +0800
Subject: iommu: Remove unused declaration iommu_sva_unbind_gpasid()

Commit 0c9f17877891 ("iommu: Remove guest pasid related interfaces and definitions")
removed the implementation but leave declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240808140619.2498535-1-yuehaibing@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 4d47f2c33311..04cbdae0052e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
-extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
-				   struct device *dev, ioasid_t pasid);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
-- 
cgit v1.2.3


From 1ef33652d22c69a2a7519d003cd6c79b8e514f44 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 8 Aug 2024 05:25:07 -0700
Subject: net: netpoll: extract core of netpoll_cleanup

Extract the core part of netpoll_cleanup(), so, it could be called from
a caller that has the rtnl lock already.

Netconsole uses this in a weird way right now:

	__netpoll_cleanup(&nt->np);
	spin_lock_irqsave(&target_list_lock, flags);
	netdev_put(nt->np.dev, &nt->np.dev_tracker);
	nt->np.dev = NULL;
	nt->enabled = false;

This will be replaced by do_netpoll_cleanup() as the locking situation
is overhauled.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netpoll.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index bd19c4b91e31..cd4e28db0cbd 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -64,6 +64,7 @@ int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
+void do_netpoll_cleanup(struct netpoll *np);
 netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 #ifdef CONFIG_NETPOLL
-- 
cgit v1.2.3


From 844efaef48e846b5310b0e7af5e7578762eb7e8d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 22 Apr 2024 17:33:38 +0300
Subject: drm: fixed: Don't use "proxy" headers

Update header inclusions to follow IWYU (Include What You Use)
principle.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240422143338.2026791-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_fixed.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index ef8bc8d72039..1922188f00e8 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -25,8 +25,9 @@
 #ifndef DRM_FIXED_H
 #define DRM_FIXED_H
 
-#include <linux/kernel.h>
 #include <linux/math64.h>
+#include <linux/types.h>
+#include <linux/wordpart.h>
 
 typedef union dfixed {
 	u32 full;
-- 
cgit v1.2.3


From 58a63729c957621f1990c3494c702711188ca347 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Fri, 9 Aug 2024 08:58:58 -0700
Subject: net: mana: Fix doorbell out of order violation and avoid unnecessary
 doorbell rings

After napi_complete_done() is called when NAPI is polling in the current
process context, another NAPI may be scheduled and start running in
softirq on another CPU and may ring the doorbell before the current CPU
does. When combined with unnecessary rings when there is no need to arm
the CQ, it triggers error paths in the hardware.

This patch fixes this by calling napi_complete_done() after doorbell
rings. It limits the number of unnecessary rings when there is
no need to arm. MANA hardware specifies that there must be one doorbell
ring every 8 CQ wraparounds. This driver guarantees one doorbell ring as
soon as the number of consumed CQEs exceeds 4 CQ wraparounds. In practical
workloads, the 4 CQ wraparounds proves to be big enough that it rarely
exceeds this limit before all the napi weight is consumed.

To implement this, add a per-CQ counter cq->work_done_since_doorbell,
and make sure the CQ is armed as soon as passing 4 wraparounds of the CQ.

Cc: stable@vger.kernel.org
Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
Link: https://patch.msgid.link/1723219138-29887-1-git-send-email-longli@linuxonhyperv.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/mana/mana.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 6439fd8b437b..7caa334f4888 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -275,6 +275,7 @@ struct mana_cq {
 	/* NAPI data */
 	struct napi_struct napi;
 	int work_done;
+	int work_done_since_doorbell;
 	int budget;
 };
 
-- 
cgit v1.2.3


From 779bac9994452f6a894524f70c00cfb0cd4b6364 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 12 Aug 2024 15:08:04 +0200
Subject: Revert "ACPI: EC: Evaluate orphan _REG under EC device"

This reverts commit 0e6b6dedf168 ("Revert "ACPI: EC: Evaluate orphan
_REG under EC device") because the problem addressed by it will be
addressed differently in what follows.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Cc: All applicable <stable@vger.kernel.org>
Link: https://patch.msgid.link/3236716.5fSG56mABF@rjwysocki.net
---
 include/acpi/acpixf.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 80dc36f9d527..94d0fc3bd412 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -662,10 +662,6 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_execute_reg_methods(acpi_handle device,
 						     acpi_adr_space_type
 						     space_id))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			    acpi_execute_orphan_reg_method(acpi_handle device,
-							   acpi_adr_space_type
-							   space_id))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_remove_address_space_handler(acpi_handle
 							      device,
-- 
cgit v1.2.3


From cdf65d73e001fde600b18d7e45afadf559425ce5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 12 Aug 2024 15:11:42 +0200
Subject: ACPICA: Add a depth argument to acpi_execute_reg_methods()

A subsequent change will need to pass a depth argument to
acpi_execute_reg_methods(), so prepare that function for it.

No intentional functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Cc: All applicable <stable@vger.kernel.org>
Link: https://patch.msgid.link/8451567.NyiUUSuA9g@rjwysocki.net
---
 include/acpi/acpixf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 94d0fc3bd412..9f1c1d225e32 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -660,6 +660,7 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			     void *context))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_execute_reg_methods(acpi_handle device,
+						     u32 nax_depth,
 						     acpi_adr_space_type
 						     space_id))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-- 
cgit v1.2.3


From dde286ee57704226b500cb9eb59547fec07aad3d Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Thu, 8 Aug 2024 15:36:28 +0300
Subject: regmap: Allow setting IRQ domain name suffix

When multiple IRQ domains are created from the same device-tree node they
will get the same name based on the device-tree path. This will cause a
naming collision in debugFS when IRQ domain specific entries are created.

The regmap-IRQ creates per instance IRQ domains. This will lead to a
domain name conflict when a device which provides more than one
interrupt line uses the regmap-IRQ.

Add support for specifying an IRQ domain name suffix when creating a
regmap-IRQ controller.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://patch.msgid.link/776bc4996969e5081bcf61b9bdb5517e537147a3.1723120028.git.mazziesaccount@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 122e38161acb..f9ccad32fc5c 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1521,6 +1521,9 @@ struct regmap_irq_chip_data;
  * struct regmap_irq_chip - Description of a generic regmap irq_chip.
  *
  * @name:        Descriptive name for IRQ controller.
+ * @domain_suffix: Name suffix to be appended to end of IRQ domain name. Needed
+ *		   when multiple regmap-IRQ controllers are created from same
+ *		   device.
  *
  * @main_status: Base main status register address. For chips which have
  *		 interrupts arranged in separate sub-irq blocks with own IRQ
@@ -1606,6 +1609,7 @@ struct regmap_irq_chip_data;
  */
 struct regmap_irq_chip {
 	const char *name;
+	const char *domain_suffix;
 
 	unsigned int main_status;
 	unsigned int num_main_status_bits;
-- 
cgit v1.2.3


From 2a0629834cd82f05d424bbc193374f9a43d1f87d Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Fri, 9 Aug 2024 11:16:28 +0800
Subject: vfs: Don't evict inode under the inode lru traversing context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The inode reclaiming process(See function prune_icache_sb) collects all
reclaimable inodes and mark them with I_FREEING flag at first, at that
time, other processes will be stuck if they try getting these inodes
(See function find_inode_fast), then the reclaiming process destroy the
inodes by function dispose_list(). Some filesystems(eg. ext4 with
ea_inode feature, ubifs with xattr) may do inode lookup in the inode
evicting callback function, if the inode lookup is operated under the
inode lru traversing context, deadlock problems may happen.

Case 1: In function ext4_evict_inode(), the ea inode lookup could happen
        if ea_inode feature is enabled, the lookup process will be stuck
	under the evicting context like this:

 1. File A has inode i_reg and an ea inode i_ea
 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea
 3. Then, following three processes running like this:

    PA                              PB
 echo 2 > /proc/sys/vm/drop_caches
  shrink_slab
   prune_dcache_sb
   // i_reg is added into lru, lru->i_ea->i_reg
   prune_icache_sb
    list_lru_walk_one
     inode_lru_isolate
      i_ea->i_state |= I_FREEING // set inode state
     inode_lru_isolate
      __iget(i_reg)
      spin_unlock(&i_reg->i_lock)
      spin_unlock(lru_lock)
                                     rm file A
                                      i_reg->nlink = 0
      iput(i_reg) // i_reg->nlink is 0, do evict
       ext4_evict_inode
        ext4_xattr_delete_inode
         ext4_xattr_inode_dec_ref_all
          ext4_xattr_inode_iget
           ext4_iget(i_ea->i_ino)
            iget_locked
             find_inode_fast
              __wait_on_freeing_inode(i_ea) ----→ AA deadlock
    dispose_list // cannot be executed by prune_icache_sb
     wake_up_bit(&i_ea->i_state)

Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file
        deleting process holds BASEHD's wbuf->io_mutex while getting the
	xattr inode, which could race with inode reclaiming process(The
        reclaiming process could try locking BASEHD's wbuf->io_mutex in
	inode evicting function), then an ABBA deadlock problem would
	happen as following:

 1. File A has inode ia and a xattr(with inode ixa), regular file B has
    inode ib and a xattr.
 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa
 3. Then, following three processes running like this:

        PA                PB                        PC
                echo 2 > /proc/sys/vm/drop_caches
                 shrink_slab
                  prune_dcache_sb
                  // ib and ia are added into lru, lru->ixa->ib->ia
                  prune_icache_sb
                   list_lru_walk_one
                    inode_lru_isolate
                     ixa->i_state |= I_FREEING // set inode state
                    inode_lru_isolate
                     __iget(ib)
                     spin_unlock(&ib->i_lock)
                     spin_unlock(lru_lock)
                                                   rm file B
                                                    ib->nlink = 0
 rm file A
  iput(ia)
   ubifs_evict_inode(ia)
    ubifs_jnl_delete_inode(ia)
     ubifs_jnl_write_inode(ia)
      make_reservation(BASEHD) // Lock wbuf->io_mutex
      ubifs_iget(ixa->i_ino)
       iget_locked
        find_inode_fast
         __wait_on_freeing_inode(ixa)
          |          iput(ib) // ib->nlink is 0, do evict
          |           ubifs_evict_inode
          |            ubifs_jnl_delete_inode(ib)
          ↓             ubifs_jnl_write_inode
     ABBA deadlock ←-----make_reservation(BASEHD)
                   dispose_list // cannot be executed by prune_icache_sb
                    wake_up_bit(&ixa->i_state)

Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING
to pin the inode in memory while inode_lru_isolate() reclaims its pages
instead of using ordinary inode reference. This way inode deletion
cannot be triggered from inode_lru_isolate() thus avoiding the deadlock.
evict() is made to wait for I_LRU_ISOLATING to be cleared before
proceeding with inode cleanup.

Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022
Fixes: e50e5129f384 ("ext4: xattr-in-inode support")
Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files")
Cc: stable@vger.kernel.org
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..fb0426f349fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *
  * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
  *
+ * I_LRU_ISOLATING	Inode is pinned being isolated from LRU without holding
+ *			i_count.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_DONTCACHE		(1 << 16)
 #define I_SYNC_QUEUED		(1 << 17)
 #define I_PINNING_NETFS_WB	(1 << 18)
+#define __I_LRU_ISOLATING	19
+#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
-- 
cgit v1.2.3


From bcc954c6caba01fca143162d5fbb90e46aa1ad80 Mon Sep 17 00:00:00 2001
From: Ryo Takakura <takakura@valinux.co.jp>
Date: Mon, 12 Aug 2024 16:27:03 +0900
Subject: printk/panic: Allow cpu backtraces to be written into ringbuffer
 during panic

commit 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing
to ringbuffer") disabled non-panic CPUs to further write messages to
ringbuffer after panicked.

Since the commit, non-panicked CPU's are not allowed to write to
ring buffer after panicked and CPU backtrace which is triggered
after panicked to sample non-panicked CPUs' backtrace no longer
serves its function as it has nothing to print.

Fix the issue by allowing non-panicked CPUs to write into ringbuffer
while CPU backtrace is in flight.

Fixes: 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer")
Signed-off-by: Ryo Takakura <takakura@valinux.co.jp>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240812072703.339690-1-takakura@valinux.co.jp
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/panic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/panic.h b/include/linux/panic.h
index 3130e0b5116b..54d90b6c5f47 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -16,6 +16,7 @@ extern void oops_enter(void);
 extern void oops_exit(void);
 extern bool oops_may_print(void);
 
+extern bool panic_triggering_all_cpu_backtrace;
 extern int panic_timeout;
 extern unsigned long panic_print;
 extern int panic_on_oops;
-- 
cgit v1.2.3


From 6c22aadbf6fd0240181eb4897308153c2aabec2a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 12 Aug 2024 10:28:28 +0200
Subject: drm/fbdev-helper: Remove drm_fb_helper_output_poll_changed()

The function is unused. Remove it.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-8-tzimmermann@suse.de
---
 include/drm/drm_fb_helper.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 375737fd6c36..699f2790b9ac 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -271,9 +271,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
 int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper);
 int drm_fb_helper_debug_enter(struct fb_info *info);
 int drm_fb_helper_debug_leave(struct fb_info *info);
-
 void drm_fb_helper_lastclose(struct drm_device *dev);
-void drm_fb_helper_output_poll_changed(struct drm_device *dev);
 #else
 static inline void drm_fb_helper_prepare(struct drm_device *dev,
 					 struct drm_fb_helper *helper,
@@ -401,10 +399,6 @@ static inline int drm_fb_helper_debug_leave(struct fb_info *info)
 static inline void drm_fb_helper_lastclose(struct drm_device *dev)
 {
 }
-
-static inline void drm_fb_helper_output_poll_changed(struct drm_device *dev)
-{
-}
 #endif
 
 #endif
-- 
cgit v1.2.3


From b5757a5be2fac24f5c138e8ddb3b2c7be8ba1cb3 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 12 Aug 2024 10:28:29 +0200
Subject: drm: Remove struct drm_driver.lastclose

The lastclose callback in struct drm_driver is unused. Remove it. Also
update documentation.

v2:
- update to use drm_lastclose()
- fix typo in documentation

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-9-tzimmermann@suse.de
---
 include/drm/drm_drv.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index cd37936c3926..02ea4e3248fd 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -228,34 +228,6 @@ struct drm_driver {
 	 */
 	void (*postclose) (struct drm_device *, struct drm_file *);
 
-	/**
-	 * @lastclose:
-	 *
-	 * Called when the last &struct drm_file has been closed and there's
-	 * currently no userspace client for the &struct drm_device.
-	 *
-	 * Modern drivers should only use this to force-restore the fbdev
-	 * framebuffer using drm_fb_helper_restore_fbdev_mode_unlocked().
-	 * Anything else would indicate there's something seriously wrong.
-	 * Modern drivers can also use this to execute delayed power switching
-	 * state changes, e.g. in conjunction with the :ref:`vga_switcheroo`
-	 * infrastructure.
-	 *
-	 * This is called after @postclose hook has been called.
-	 *
-	 * NOTE:
-	 *
-	 * All legacy drivers use this callback to de-initialize the hardware.
-	 * This is purely because of the shadow-attach model, where the DRM
-	 * kernel driver does not really own the hardware. Instead ownershipe is
-	 * handled with the help of userspace through an inheritedly racy dance
-	 * to set/unset the VT into raw mode.
-	 *
-	 * Legacy drivers initialize the hardware in the @firstopen callback,
-	 * which isn't even called for modern drivers.
-	 */
-	void (*lastclose) (struct drm_device *);
-
 	/**
 	 * @unload:
 	 *
-- 
cgit v1.2.3


From 446d0f4849b101bfc35c0d00835c3e3a4804616d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 12 Aug 2024 10:28:30 +0200
Subject: drm: Remove struct drm_mode_config_funcs.output_poll_changed

The output_poll_changed hook in struct drm_mode_config_funcs is
unused. Remove it. The helper drm_client_dev_hotplug() implements
the callback's functionality.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-10-tzimmermann@suse.de
---
 include/drm/drm_mode_config.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index ab0f167474b1..271765e2e9f2 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -97,22 +97,6 @@ struct drm_mode_config_funcs {
 	 */
 	const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd);
 
-	/**
-	 * @output_poll_changed:
-	 *
-	 * Callback used by helpers to inform the driver of output configuration
-	 * changes.
-	 *
-	 * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event()
-	 * to call this hook to inform the fbdev helper of output changes.
-	 *
-	 * This hook is deprecated, drivers should instead implement fbdev
-	 * support with struct drm_client, which takes care of any necessary
-	 * hotplug event forwarding already without further involvement by
-	 * the driver.
-	 */
-	void (*output_poll_changed)(struct drm_device *dev);
-
 	/**
 	 * @mode_valid:
 	 *
-- 
cgit v1.2.3


From 5ddb0a8aa8e4754a8fb77e284e0d6f46c2350f88 Mon Sep 17 00:00:00 2001
From: Tejas Vipin <tejasvipin76@gmail.com>
Date: Tue, 6 Aug 2024 19:29:48 +0530
Subject: drm/mipi-dsi: add more multi functions for better error handling

Add more functions that can benefit from being multi style and mark
older variants as deprecated to eventually convert all mipi_dsi functions
to multi style.

Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Tejas Vipin <tejasvipin76@gmail.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Jessica Zhang <quic_jesszhan@quicinc.com>
[dianders: Fixed whitespace warning when applying]
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240806135949.468636-2-tejasvipin76@gmail.com
---
 include/drm/drm_mipi_dsi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 0f520eeeaa8e..b78aae45cae7 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -365,6 +365,16 @@ void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx);
 void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx);
 void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx,
 				    enum mipi_dsi_dcs_tear_mode mode);
+void mipi_dsi_turn_on_peripheral_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_soft_reset_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_set_display_brightness_multi(struct mipi_dsi_multi_context *ctx,
+					       u16 brightness);
+void mipi_dsi_dcs_set_pixel_format_multi(struct mipi_dsi_multi_context *ctx,
+					 u8 format);
+void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx,
+					   u16 start, u16 end);
+void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx,
+					 u16 start, u16 end);
 
 /**
  * mipi_dsi_generic_write_seq - transmit data using a generic write packet
-- 
cgit v1.2.3


From ec0a7d44b358afaaf52856d03c72e20587bc888b Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 9 Aug 2024 15:28:25 -0700
Subject: workqueue: Add interface for user-defined workqueue lockdep map

Add an interface for a user-defined workqueue lockdep map, which is
helpful when multiple workqueues are created for the same purpose. This
also helps avoid leaking lockdep maps on each workqueue creation.

v2:
 - Add alloc_workqueue_lockdep_map (Tejun)
v3:
 - Drop __WQ_USER_OWNED_LOCKDEP (Tejun)
 - static inline alloc_ordered_workqueue_lockdep_map (Tejun)

Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4eb8f9563136..8ccbf510880b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -507,6 +507,58 @@ void workqueue_softirq_dead(unsigned int cpu);
 __printf(1, 4) struct workqueue_struct *
 alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
 
+#ifdef CONFIG_LOCKDEP
+/**
+ * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @lockdep_map: user-defined lockdep_map
+ * @...: args for @fmt
+ *
+ * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for
+ * workqueues created with the same purpose and to avoid leaking a lockdep_map
+ * on each workqueue creation.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+__printf(1, 5) struct workqueue_struct *
+alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
+			    struct lockdep_map *lockdep_map, ...);
+
+/**
+ * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with
+ * user-defined lockdep_map
+ *
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
+ * @lockdep_map: user-defined lockdep_map
+ * @args: args for @fmt
+ *
+ * Same as alloc_ordered_workqueue but with the a user-define lockdep_map.
+ * Useful for workqueues created with the same purpose and to avoid leaking a
+ * lockdep_map on each workqueue creation.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+__printf(1, 4) static inline struct workqueue_struct *
+alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags,
+				    struct lockdep_map *lockdep_map, ...)
+{
+	struct workqueue_struct *wq;
+	va_list args;
+
+	va_start(args, lockdep_map);
+	wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags,
+					 1, lockdep_map, args);
+	va_end(args);
+
+	return wq;
+}
+#endif
+
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
  * @fmt: printf format for the name of the workqueue
-- 
cgit v1.2.3


From fb4dae4ca315fe48f1a8a452172f29196b2a7f3d Mon Sep 17 00:00:00 2001
From: Junfeng Guo <junfeng.guo@intel.com>
Date: Thu, 25 Jul 2024 16:08:05 -0600
Subject: virtchnl: support raw packet in protocol header

The patch extends existing virtchnl_proto_hdrs structure to allow VF
to pass a pair of buffers as packet data and mask that describe
a match pattern of a filter rule. Then the kernel PF driver is requested
to parse the pair of buffer and figure out low level hardware metadata
(ptype, profile, field vector.. ) to program the expected FDIR or RSS
rules.

Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 8e177b67e82f..4f78a65e33dc 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -1121,6 +1121,7 @@ enum virtchnl_vfr_states {
 };
 
 #define VIRTCHNL_MAX_NUM_PROTO_HDRS	32
+#define VIRTCHNL_MAX_SIZE_RAW_PACKET	1024
 #define PROTO_HDR_SHIFT			5
 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
 #define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1)
@@ -1266,13 +1267,22 @@ struct virtchnl_proto_hdrs {
 	u8 pad[3];
 	/**
 	 * specify where protocol header start from.
+	 * must be 0 when sending a raw packet request.
 	 * 0 - from the outer layer
 	 * 1 - from the first inner layer
 	 * 2 - from the second inner layer
 	 * ....
 	 **/
 	int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
-	struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+	union {
+		struct virtchnl_proto_hdr
+			proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+		struct {
+			u16 pkt_len;
+			u8 spec[VIRTCHNL_MAX_SIZE_RAW_PACKET];
+			u8 mask[VIRTCHNL_MAX_SIZE_RAW_PACKET];
+		} raw;
+	};
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
-- 
cgit v1.2.3


From 623122ac1c4074791ea319df4676fb2875817fe4 Mon Sep 17 00:00:00 2001
From: Ahmed Zaki <ahmed.zaki@intel.com>
Date: Thu, 25 Jul 2024 16:08:09 -0600
Subject: iavf: add support for offloading tc U32 cls filters

Add support for offloading cls U32 filters. Only "skbedit queue_mapping"
and "drop" actions are supported. Also, only "ip" and "802_3" tc
protocols are allowed. The PF must advertise the VIRTCHNL_VF_OFFLOAD_TC_U32
capability flag.

Since the filters will be enabled via the FD stage at the PF, a new type
of FDIR filters is added and the existing list and state machine are used.

The new filters can be used to configure flow directors based on raw
(binary) pattern in the rx packet.

Examples:

0. # tc qdisc add dev enp175s0v0  ingress

1. Redirect UDP from src IP 192.168.2.1 to queue 12:

    # tc filter add dev <dev> protocol ip ingress u32 \
	match u32 0x45000000 0xff000000 at 0  \
	match u32 0x00110000 0x00ff0000 at 8  \
	match u32 0xC0A80201 0xffffffff at 12 \
	match u32 0x00000000 0x00000000 at 24 \
	action skbedit queue_mapping 12 skip_sw

2. Drop all ICMP:

    # tc filter add dev <dev> protocol ip ingress u32 \
	match u32 0x45000000 0xff000000 at 0  \
	match u32 0x00010000 0x00ff0000 at 8  \
	match u32 0x00000000 0x00000000 at 24 \
	action drop skip_sw

3. Redirect ICMP traffic from MAC 3c:fd:fe:a5:47:e0 to queue 7
   (note proto: 802_3):

   # tc filter add dev <dev> protocol 802_3 ingress u32 \
	match u32 0x00003CFD 0x0000ffff at 4   \
	match u32 0xFEA547E0 0xffffffff at 8   \
	match u32 0x08004500 0xffffff00 at 12  \
	match u32 0x00000001 0x000000ff at 20  \
	match u32 0x0000 0x0000 at 40          \
	action skbedit queue_mapping 7 skip_sw

Notes on matches:
1 - All intermediate fields that are needed to parse the correct PTYPE
    must be provided (in e.g. 3: Ethernet Type 0x0800 in MAC, IP version
    and IP length: 0x45 and protocol: 0x01 (ICMP)).
2 - The last match must provide an offset that guarantees all required
    headers are accounted for, even if the last header is not matched.
    For example, in #2, the last match is 4 bytes at offset 24 starting
    from IP header, so the total is 14 (MAC) + 24 + 4 = 42, which is the
    sum of MAC+IP+ICMP headers.

Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 4f78a65e33dc..f41395264dca 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -247,6 +247,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 /* used to negotiate communicating link speeds in Mbps */
 #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		BIT(7)
 #define  VIRTCHNL_VF_OFFLOAD_CRC		BIT(10)
+#define VIRTCHNL_VF_OFFLOAD_TC_U32		BIT(11)
 #define VIRTCHNL_VF_OFFLOAD_VLAN_V2		BIT(15)
 #define VIRTCHNL_VF_OFFLOAD_VLAN		BIT(16)
 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING		BIT(17)
-- 
cgit v1.2.3


From 55f325958ccc41eaea43eb4546d4dc77c1b5ef8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Jul 2024 01:16:04 -0400
Subject: bpf: switch maps to CLASS(fd, ...)

        Calling conventions for __bpf_map_get() would be more convenient
if it left fpdut() on failure to callers.  Makes for simpler logics
in the callers.

	Among other things, the proof of memory safety no longer has to
rely upon file->private_data never being ERR_PTR(...) for bpffs files.
Original calling conventions made it impossible for the caller to tell
whether __bpf_map_get() has returned ERR_PTR(-EINVAL) because it has found
the file not be a bpf map one (in which case it would've done fdput())
or because it found that ERR_PTR(-EINVAL) in file->private_data of a
bpf map file (in which case fdput() would _not_ have been done).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b9425e410bcb..9f35df07e86d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2241,7 +2241,16 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
 
 struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
-struct bpf_map *__bpf_map_get(struct fd f);
+
+static inline struct bpf_map *__bpf_map_get(struct fd f)
+{
+	if (fd_empty(f))
+		return ERR_PTR(-EBADF);
+	if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+		return ERR_PTR(-EINVAL);
+	return fd_file(f)->private_data;
+}
+
 void bpf_map_inc(struct bpf_map *map);
 void bpf_map_inc_with_uref(struct bpf_map *map);
 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
-- 
cgit v1.2.3


From 03a979b74dc1ad5aeed8026a84d8771842cb1631 Mon Sep 17 00:00:00 2001
From: Yunke Cao <yunkec@chromium.org>
Date: Wed, 14 Aug 2024 11:06:43 +0900
Subject: media: videobuf2-core: attach once if multiple planes share the same
 dbuf

When multiple planes use the same dma buf, each plane will have its own dma
buf attachment and mapping. It is a waste of IOVA space.

This patch adds a dbuf_duplicated boolean in vb2_plane. If a plane's dbuf
is the same as an existing plane, do not create another attachment and
mapping.

Signed-off-by: Yunke Cao <yunkec@chromium.org>
Acked-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/videobuf2-core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 955237ac503d..9b02aeba4108 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -154,6 +154,8 @@ struct vb2_mem_ops {
  * @mem_priv:	private data with this plane.
  * @dbuf:	dma_buf - shared buffer object.
  * @dbuf_mapped:	flag to show whether dbuf is mapped or not
+ * @dbuf_duplicated:	boolean to show whether dbuf is duplicated with a
+ *		previous plane of the buffer.
  * @bytesused:	number of bytes occupied by data in the plane (payload).
  * @length:	size of this plane (NOT the payload) in bytes. The maximum
  *		valid size is MAX_UINT - PAGE_SIZE.
@@ -179,6 +181,7 @@ struct vb2_plane {
 	void			*mem_priv;
 	struct dma_buf		*dbuf;
 	unsigned int		dbuf_mapped;
+	bool			dbuf_duplicated;
 	unsigned int		bytesused;
 	unsigned int		length;
 	unsigned int		min_length;
-- 
cgit v1.2.3


From dfa5543193f303a7270ec7c725e656970faf7d57 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 12 Aug 2024 17:28:49 +0300
Subject: drm/edid: make drm_edid_block_valid() static

drm_edid_block_valid() is no longer used outside of drm_edid.c. Make it
static.

Acked-by: Zhi Wang <zhiwang@kernel.rog>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240812142849.1588006-2-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_edid.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 6bdfa254a1c1..eaac5e665892 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -440,8 +440,6 @@ int drm_add_modes_noedid(struct drm_connector *connector,
 			 int hdisplay, int vdisplay);
 
 int drm_edid_header_is_valid(const void *edid);
-bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid,
-			  bool *edid_corrupt);
 bool drm_edid_is_valid(struct edid *edid);
 void drm_edid_get_monitor_name(const struct edid *edid, char *name,
 			       int buflen);
-- 
cgit v1.2.3


From 7945b741d1fc071a621366c512a060ea08848955 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 3 Jul 2024 19:47:47 -0700
Subject: rcu-tasks: Remove RCU Tasks Rude asynchronous APIs

The call_rcu_tasks_rude() and rcu_barrier_tasks_rude() APIs are currently
unused.  This commit therefore removes their definitions and boot-time
self-tests.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 13f6f00aecf9..31e679c7110e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -37,7 +37,6 @@
 /* Exported common interfaces */
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
-void rcu_barrier_tasks_rude(void);
 void synchronize_rcu(void);
 
 struct rcu_gp_oldstate;
@@ -202,7 +201,6 @@ do {									\
 } while (0)
 
 # ifdef CONFIG_TASKS_RUDE_RCU
-void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks_rude(void);
 # endif
 
-- 
cgit v1.2.3


From fe91cf39db0939ac8d523b5a1c31840f7cbe205c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 1 Aug 2024 17:34:25 -0700
Subject: rcu/tasks: Add detailed grace-period and barrier diagnostics

This commit adds rcu_tasks_torture_stats_print(),
rcu_tasks_trace_torture_stats_print(), and
rcu_tasks_rude_torture_stats_print() functions that provide detailed
diagnostics on grace-period, callback, and barrier state.

Signed-off-by: "Paul E. McKenney" <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcupdate.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 31e679c7110e..17463e95b6ef 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -164,6 +164,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { }
 	} while (0)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
+void rcu_tasks_torture_stats_print(char *tt, char *tf);
 # else
 # define rcu_tasks_classic_qs(t, preempt) do { } while (0)
 # define call_rcu_tasks call_rcu
@@ -190,6 +191,7 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t);
 			rcu_tasks_trace_qs_blkd(t);				\
 		}								\
 	} while (0)
+void rcu_tasks_trace_torture_stats_print(char *tt, char *tf);
 # else
 # define rcu_tasks_trace_qs(t) do { } while (0)
 # endif
@@ -202,6 +204,7 @@ do {									\
 
 # ifdef CONFIG_TASKS_RUDE_RCU
 void synchronize_rcu_tasks_rude(void);
+void rcu_tasks_rude_torture_stats_print(char *tt, char *tf);
 # endif
 
 #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
-- 
cgit v1.2.3


From ac79beb913dc40b1a49b628e31afdfeb20194ab6 Mon Sep 17 00:00:00 2001
From: Paul Elder <paul.elder@ideasonboard.com>
Date: Thu, 8 Aug 2024 22:41:05 +0200
Subject: media: rkisp1: Add support for the companding block

Add support to the rkisp1 driver for the companding block that exists on
the i.MX8MP version of the ISP. This requires usage of the new
extensible parameters format, and showcases how the format allows for
extensions without breaking backward compatibility.

Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
Reviewed-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/uapi/linux/rkisp1-config.h | 89 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index 9767bb19c72d..430daceafac7 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -164,6 +164,11 @@
 #define RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS      17
 #define RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS  6
 
+/*
+ * Compand
+ */
+#define RKISP1_CIF_ISP_COMPAND_NUM_POINTS	64
+
 /*
  * Measurement types
  */
@@ -851,6 +856,39 @@ struct rkisp1_params_cfg {
 	struct rkisp1_cif_isp_isp_other_cfg others;
 };
 
+/**
+ * struct rkisp1_cif_isp_compand_bls_config - Rockchip ISP1 Companding parameters (BLS)
+ * @r: Fixed subtraction value for Bayer pattern R
+ * @gr: Fixed subtraction value for Bayer pattern Gr
+ * @gb: Fixed subtraction value for Bayer pattern Gb
+ * @b: Fixed subtraction value for Bayer pattern B
+ *
+ * The values will be subtracted from the sensor values. Note that unlike the
+ * dedicated BLS block, the BLS values in the compander are 20-bit unsigned.
+ */
+struct rkisp1_cif_isp_compand_bls_config {
+	__u32 r;
+	__u32 gr;
+	__u32 gb;
+	__u32 b;
+};
+
+/**
+ * struct rkisp1_cif_isp_compand_curve_config - Rockchip ISP1 Companding
+ * parameters (expand and compression curves)
+ * @px: Compand curve x-values. Each value stores the distance from the
+ *      previous x-value, expressed as log2 of the distance on 5 bits.
+ * @x: Compand curve x-values. The functionality of these parameters are
+ *     unknown due to do a lack of hardware documentation, but these are left
+ *     here for future compatibility purposes.
+ * @y: Compand curve y-values
+ */
+struct rkisp1_cif_isp_compand_curve_config {
+	__u8 px[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+	__u32 x[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+	__u32 y[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+};
+
 /*---------- PART2: Measurement Statistics ------------*/
 
 /**
@@ -1018,6 +1056,9 @@ struct rkisp1_stat_buffer {
  * @RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS: Histogram statistics
  * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS: Auto exposure statistics
  * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS: Auto-focus statistics
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS: BLS in the compand block
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND: Companding expand curve
+ * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS: Companding compress curve
  */
 enum rkisp1_ext_params_block_type {
 	RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS,
@@ -1037,6 +1078,9 @@ enum rkisp1_ext_params_block_type {
 	RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS,
 	RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS,
 	RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND,
+	RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS,
 };
 
 #define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE	(1U << 0)
@@ -1380,6 +1424,46 @@ struct rkisp1_ext_params_afc_config {
 	struct rkisp1_cif_isp_afc_config config;
 } __attribute__((aligned(8)));
 
+/**
+ * struct rkisp1_ext_params_compand_bls_config - RkISP1 extensible params
+ * Compand BLS config
+ *
+ * RkISP1 extensible parameters Companding configuration block (black level
+ * subtraction). Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Companding BLS configuration, see
+ *	    :c:type:`rkisp1_cif_isp_compand_bls_config`
+ */
+struct rkisp1_ext_params_compand_bls_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_compand_bls_config config;
+} __attribute__((aligned(8)));
+
+/**
+ * struct rkisp1_ext_params_compand_curve_config - RkISP1 extensible params
+ * Compand curve config
+ *
+ * RkISP1 extensible parameters Companding configuration block (expand and
+ * compression curves). Identified by
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND` or
+ * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS`.
+ *
+ * @header: The RkISP1 extensible parameters header, see
+ *	    :c:type:`rkisp1_ext_params_block_header`
+ * @config: Companding curve configuration, see
+ *	    :c:type:`rkisp1_cif_isp_compand_curve_config`
+ */
+struct rkisp1_ext_params_compand_curve_config {
+	struct rkisp1_ext_params_block_header header;
+	struct rkisp1_cif_isp_compand_curve_config config;
+} __attribute__((aligned(8)));
+
+/*
+ * The rkisp1_ext_params_compand_curve_config structure is counted twice as it
+ * is used for both the COMPAND_EXPAND and COMPAND_COMPRESS block types.
+ */
 #define RKISP1_EXT_PARAMS_MAX_SIZE					\
 	(sizeof(struct rkisp1_ext_params_bls_config)			+\
 	sizeof(struct rkisp1_ext_params_dpcc_config)			+\
@@ -1397,7 +1481,10 @@ struct rkisp1_ext_params_afc_config {
 	sizeof(struct rkisp1_ext_params_awb_meas_config)		+\
 	sizeof(struct rkisp1_ext_params_hst_config)			+\
 	sizeof(struct rkisp1_ext_params_aec_config)			+\
-	sizeof(struct rkisp1_ext_params_afc_config))
+	sizeof(struct rkisp1_ext_params_afc_config)			+\
+	sizeof(struct rkisp1_ext_params_compand_bls_config)		+\
+	sizeof(struct rkisp1_ext_params_compand_curve_config)		+\
+	sizeof(struct rkisp1_ext_params_compand_curve_config))
 
 /**
  * enum rksip1_ext_param_buffer_version - RkISP1 extensible parameters version
-- 
cgit v1.2.3


From 66155de93bcf4f2967e602a4b3bf7ebe58f34b11 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 9 Aug 2024 12:02:58 -0700
Subject: KVM: x86: Disallow read-only memslots for SEV-ES and SEV-SNP (and
 TDX)

Disallow read-only memslots for SEV-{ES,SNP} VM types, as KVM can't
directly emulate instructions for ES/SNP, and instead the guest must
explicitly request emulation.  Unless the guest explicitly requests
emulation without accessing memory, ES/SNP relies on KVM creating an MMIO
SPTE, with the subsequent #NPF being reflected into the guest as a #VC.

But for read-only memslots, KVM deliberately doesn't create MMIO SPTEs,
because except for ES/SNP, doing so requires setting reserved bits in the
SPTE, i.e. the SPTE can't be readable while also generating a #VC on
writes.  Because KVM never creates MMIO SPTEs and jumps directly to
emulation, the guest never gets a #VC.  And since KVM simply resumes the
guest if ES/SNP guests trigger emulation, KVM effectively puts the vCPU
into an infinite #NPF loop if the vCPU attempts to write read-only memory.

Disallow read-only memory for all VMs with protected state, i.e. for
upcoming TDX VMs as well as ES/SNP VMs.  For TDX, it's actually possible
to support read-only memory, as TDX uses EPT Violation #VE to reflect the
fault into the guest, e.g. KVM could configure read-only SPTEs with RX
protections and SUPPRESS_VE=0.  But there is no strong use case for
supporting read-only memslots on TDX, e.g. the main historical usage is
to emulate option ROMs, but TDX disallows executing from shared memory.
And if someone comes along with a legitimate, strong use case, the
restriction can always be lifted for TDX.

Don't bother trying to retroactively apply the restriction to SEV-ES
VMs that are created as type KVM_X86_DEFAULT_VM.  Read-only memslots can't
possibly work for SEV-ES, i.e. disallowing such memslots is really just
means reporting an error to userspace instead of silently hanging vCPUs.
Trying to deal with the ordering between KVM_SEV_INIT and memslot creation
isn't worth the marginal benefit it would provide userspace.

Fixes: 26c44aa9e076 ("KVM: SEV: define VM types for SEV and SEV-ES")
Fixes: 1dfe571c12cf ("KVM: SEV: Add initial SEV-SNP support")
Cc: Peter Gonda <pgonda@google.com>
Cc: Michael Roth <michael.roth@amd.com>
Cc: Vishal Annapurve <vannapurve@google.com>
Cc: Ackerly Tng <ackerleytng@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240809190319.1710470-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 79a6b1a63027..b23c6d48392f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+#ifndef kvm_arch_has_readonly_mem
+static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
+{
+	return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM);
+}
+#endif
+
 struct kvm_memslots {
 	u64 generation;
 	atomic_long_t last_used_slot;
-- 
cgit v1.2.3


From 44f65d900698278a8451988abe0d5ca37fd46882 Mon Sep 17 00:00:00 2001
From: Jeff Xu <jeffxu@chromium.org>
Date: Tue, 6 Aug 2024 21:49:27 +0000
Subject: binfmt_elf: mseal address zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In load_elf_binary as part of the execve(), when the current
task’s personality has MMAP_PAGE_ZERO set, the kernel allocates
one page at address 0. According to the comment:

/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
    and some applications "depend" upon this behavior.
    Since we do not have the power to recompile these, we
     emulate the SVr4 behavior. Sigh. */

At one point, Linus suggested removing this [1].

Code search in debian didn't see much use of MMAP_PAGE_ZERO [2],
it exists in util and test (rr).

Sealing this is probably safe, the comment doesn't say
the app ever wanting to change the mapping to rwx. Sealing
also ensures that never happens.

If there is a complaint, we can make this configurable.

Link: https://lore.kernel.org/lkml/CAHk-=whVa=nm_GW=NVfPHqcxDbWt4JjjK1YWb0cLjO4ZSGyiDA@mail.gmail.com/ [1]
Link: https://codesearch.debian.net/search?q=MMAP_PAGE_ZERO&literal=1&perpkg=1&page=1 [2]
Signed-off-by: Jeff Xu <jeffxu@chromium.org>
Link: https://lore.kernel.org/r/20240806214931.2198172-2-jeffxu@google.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/mm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4b238a20b76..a178c15812eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4201,4 +4201,14 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
 
 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
 
+#ifdef CONFIG_64BIT
+int do_mseal(unsigned long start, size_t len_in, unsigned long flags);
+#else
+static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
+{
+	/* noop on 32 bit */
+	return 0;
+}
+#endif
+
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From 1c0e5881691a787a9399a99bff4d56ead6e75e91 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@amd.com>
Date: Wed, 14 Aug 2024 10:31:13 +0200
Subject: KVM: SEV: uapi: fix typo in SEV_RET_INVALID_CONFIG

"INVALID" is misspelt in "SEV_RET_INAVLID_CONFIG". Since this is part of
the UAPI, keep the current definition and add a new one with the fix.

Fix-suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Amit Shah <amit.shah@amd.com>
Message-ID: <20240814083113.21622-1-amit@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/psp-sev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 2289b7c76c59..832c15d9155b 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -51,6 +51,7 @@ typedef enum {
 	SEV_RET_INVALID_PLATFORM_STATE,
 	SEV_RET_INVALID_GUEST_STATE,
 	SEV_RET_INAVLID_CONFIG,
+	SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG,
 	SEV_RET_INVALID_LEN,
 	SEV_RET_ALREADY_OWNED,
 	SEV_RET_INVALID_CERTIFICATE,
-- 
cgit v1.2.3


From 71833e79a42178d8a50b5081c98c78ace9325628 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 14 Aug 2024 13:16:49 +0100
Subject: i2c: Use IS_REACHABLE() for substituting empty ACPI functions

Replace IS_ENABLED() with IS_REACHABLE() to substitute empty stubs for:
    i2c_acpi_get_i2c_resource()
    i2c_acpi_client_count()
    i2c_acpi_find_bus_speed()
    i2c_acpi_new_device_by_fwnode()
    i2c_adapter *i2c_acpi_find_adapter_by_handle()
    i2c_acpi_waive_d0_probe()

commit f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI
functions") partially fixed this conditional to depend on CONFIG_I2C,
but used IS_ENABLED(), which is wrong since CONFIG_I2C is tristate.

CONFIG_ACPI is boolean but let's also change it to use IS_REACHABLE()
to future-proof it against becoming tristate.

Somehow despite testing various combinations of CONFIG_I2C and CONFIG_ACPI
we missed the combination CONFIG_I2C=m, CONFIG_ACPI=y.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Fixes: f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202408141333.gYnaitcV-lkp@intel.com/
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7eedd0c662da..377def497298 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev,
 struct acpi_resource;
 struct acpi_resource_i2c_serialbus;
 
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C)
+#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C)
 bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 			       struct acpi_resource_i2c_serialbus **i2c);
 int i2c_acpi_client_count(struct acpi_device *adev);
-- 
cgit v1.2.3


From ac9d45544cd571decca395715d0b0a3b617d02f4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 1 Jul 2024 13:33:58 -0700
Subject: locking/csd_lock: Provide an indication of ongoing CSD-lock stall

If a CSD-lock stall goes on long enough, it will cause an RCU CPU
stall warning.  This additional warning provides much additional
console-log traffic and little additional information.  Therefore,
provide a new csd_lock_is_stuck() function that returns true if there
is an ongoing CSD-lock stall.  This function will be used by the RCU
CPU stall warnings to provide a one-line indication of the stall when
this function returns true.

[ neeraj.upadhyay: Apply Rik van Riel feedback. ]
[ neeraj.upadhyay: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Leonardo Bras <leobras@redhat.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/smp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index fcd61dfe2af3..3871bd32018f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -294,4 +294,10 @@ int smpcfd_prepare_cpu(unsigned int cpu);
 int smpcfd_dead_cpu(unsigned int cpu);
 int smpcfd_dying_cpu(unsigned int cpu);
 
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+bool csd_lock_is_stuck(void);
+#else
+static inline bool csd_lock_is_stuck(void) { return false; }
+#endif
+
 #endif /* __LINUX_SMP_H */
-- 
cgit v1.2.3


From 5115bcaf68d87e7a25fd238332b402a24bc534d0 Mon Sep 17 00:00:00 2001
From: Ajit Pandey <quic_ajipan@quicinc.com>
Date: Tue, 11 Jun 2024 19:07:46 +0530
Subject: dt-bindings: clock: qcom: add DISPCC clocks on SM4450

Add device tree bindings for the display clock controller on
Qualcomm SM4450 platform.

Signed-off-by: Ajit Pandey <quic_ajipan@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240611133752.2192401-3-quic_ajipan@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm4450-dispcc.h | 51 ++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm4450-dispcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm4450-dispcc.h b/include/dt-bindings/clock/qcom,sm4450-dispcc.h
new file mode 100644
index 000000000000..ca6f2ef90157
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm4450-dispcc.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_SM4450_H
+#define _DT_BINDINGS_CLK_QCOM_DISP_CC_SM4450_H
+
+/* DISP_CC clocks */
+#define DISP_CC_MDSS_AHB1_CLK					0
+#define DISP_CC_MDSS_AHB_CLK					1
+#define DISP_CC_MDSS_AHB_CLK_SRC				2
+#define DISP_CC_MDSS_BYTE0_CLK					3
+#define DISP_CC_MDSS_BYTE0_CLK_SRC				4
+#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC				5
+#define DISP_CC_MDSS_BYTE0_INTF_CLK				6
+#define DISP_CC_MDSS_ESC0_CLK					7
+#define DISP_CC_MDSS_ESC0_CLK_SRC				8
+#define DISP_CC_MDSS_MDP1_CLK					9
+#define DISP_CC_MDSS_MDP_CLK					10
+#define DISP_CC_MDSS_MDP_CLK_SRC				11
+#define DISP_CC_MDSS_MDP_LUT1_CLK				12
+#define DISP_CC_MDSS_MDP_LUT_CLK				13
+#define DISP_CC_MDSS_NON_GDSC_AHB_CLK				14
+#define DISP_CC_MDSS_PCLK0_CLK					15
+#define DISP_CC_MDSS_PCLK0_CLK_SRC				16
+#define DISP_CC_MDSS_ROT1_CLK					17
+#define DISP_CC_MDSS_ROT_CLK					18
+#define DISP_CC_MDSS_ROT_CLK_SRC				19
+#define DISP_CC_MDSS_RSCC_AHB_CLK				20
+#define DISP_CC_MDSS_RSCC_VSYNC_CLK				21
+#define DISP_CC_MDSS_VSYNC1_CLK					22
+#define DISP_CC_MDSS_VSYNC_CLK					23
+#define DISP_CC_MDSS_VSYNC_CLK_SRC				24
+#define DISP_CC_PLL0						25
+#define DISP_CC_PLL1						26
+#define DISP_CC_SLEEP_CLK					27
+#define DISP_CC_SLEEP_CLK_SRC					28
+#define DISP_CC_XO_CLK						29
+#define DISP_CC_XO_CLK_SRC					30
+
+/* DISP_CC power domains */
+#define DISP_CC_MDSS_CORE_GDSC					0
+#define DISP_CC_MDSS_CORE_INT2_GDSC				1
+
+/* DISP_CC resets */
+#define DISP_CC_MDSS_CORE_BCR					0
+#define DISP_CC_MDSS_CORE_INT2_BCR				1
+#define DISP_CC_MDSS_RSCC_BCR					2
+
+#endif
-- 
cgit v1.2.3


From 9bf45e4f317f75db234ef6af1f316cf4676e8863 Mon Sep 17 00:00:00 2001
From: Ajit Pandey <quic_ajipan@quicinc.com>
Date: Tue, 11 Jun 2024 19:07:48 +0530
Subject: dt-bindings: clock: qcom: add CAMCC clocks on SM4450

Add device tree bindings for the camera clock controller on
Qualcomm SM4450 platform.

Signed-off-by: Ajit Pandey <quic_ajipan@quicinc.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240611133752.2192401-5-quic_ajipan@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm4450-camcc.h | 106 ++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm4450-camcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm4450-camcc.h b/include/dt-bindings/clock/qcom,sm4450-camcc.h
new file mode 100644
index 000000000000..bf077951bf1c
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm4450-camcc.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM4450_H
+#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM4450_H
+
+/* CAM_CC clocks */
+#define CAM_CC_BPS_AHB_CLK					0
+#define CAM_CC_BPS_AREG_CLK					1
+#define CAM_CC_BPS_CLK						2
+#define CAM_CC_BPS_CLK_SRC					3
+#define CAM_CC_CAMNOC_ATB_CLK					4
+#define CAM_CC_CAMNOC_AXI_CLK					5
+#define CAM_CC_CAMNOC_AXI_CLK_SRC				6
+#define CAM_CC_CAMNOC_AXI_HF_CLK				7
+#define CAM_CC_CAMNOC_AXI_SF_CLK				8
+#define CAM_CC_CCI_0_CLK					9
+#define CAM_CC_CCI_0_CLK_SRC					10
+#define CAM_CC_CCI_1_CLK					11
+#define CAM_CC_CCI_1_CLK_SRC					12
+#define CAM_CC_CORE_AHB_CLK					13
+#define CAM_CC_CPAS_AHB_CLK					14
+#define CAM_CC_CPHY_RX_CLK_SRC					15
+#define CAM_CC_CRE_AHB_CLK					16
+#define CAM_CC_CRE_CLK						17
+#define CAM_CC_CRE_CLK_SRC					18
+#define CAM_CC_CSI0PHYTIMER_CLK					19
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC				20
+#define CAM_CC_CSI1PHYTIMER_CLK					21
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC				22
+#define CAM_CC_CSI2PHYTIMER_CLK					23
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC				24
+#define CAM_CC_CSIPHY0_CLK					25
+#define CAM_CC_CSIPHY1_CLK					26
+#define CAM_CC_CSIPHY2_CLK					27
+#define CAM_CC_FAST_AHB_CLK_SRC					28
+#define CAM_CC_ICP_ATB_CLK					29
+#define CAM_CC_ICP_CLK						30
+#define CAM_CC_ICP_CLK_SRC					31
+#define CAM_CC_ICP_CTI_CLK					32
+#define CAM_CC_ICP_TS_CLK					33
+#define CAM_CC_MCLK0_CLK					34
+#define CAM_CC_MCLK0_CLK_SRC					35
+#define CAM_CC_MCLK1_CLK					36
+#define CAM_CC_MCLK1_CLK_SRC					37
+#define CAM_CC_MCLK2_CLK					38
+#define CAM_CC_MCLK2_CLK_SRC					39
+#define CAM_CC_MCLK3_CLK					40
+#define CAM_CC_MCLK3_CLK_SRC					41
+#define CAM_CC_OPE_0_AHB_CLK					42
+#define CAM_CC_OPE_0_AREG_CLK					43
+#define CAM_CC_OPE_0_CLK					44
+#define CAM_CC_OPE_0_CLK_SRC					45
+#define CAM_CC_PLL0						46
+#define CAM_CC_PLL0_OUT_EVEN					47
+#define CAM_CC_PLL0_OUT_ODD					48
+#define CAM_CC_PLL1						49
+#define CAM_CC_PLL1_OUT_EVEN					50
+#define CAM_CC_PLL2						51
+#define CAM_CC_PLL2_OUT_EVEN					52
+#define CAM_CC_PLL3						53
+#define CAM_CC_PLL3_OUT_EVEN					54
+#define CAM_CC_PLL4						55
+#define CAM_CC_PLL4_OUT_EVEN					56
+#define CAM_CC_SLOW_AHB_CLK_SRC					57
+#define CAM_CC_SOC_AHB_CLK					58
+#define CAM_CC_SYS_TMR_CLK					59
+#define CAM_CC_TFE_0_AHB_CLK					60
+#define CAM_CC_TFE_0_CLK					61
+#define CAM_CC_TFE_0_CLK_SRC					62
+#define CAM_CC_TFE_0_CPHY_RX_CLK				63
+#define CAM_CC_TFE_0_CSID_CLK					64
+#define CAM_CC_TFE_0_CSID_CLK_SRC				65
+#define CAM_CC_TFE_1_AHB_CLK					66
+#define CAM_CC_TFE_1_CLK					67
+#define CAM_CC_TFE_1_CLK_SRC					68
+#define CAM_CC_TFE_1_CPHY_RX_CLK				69
+#define CAM_CC_TFE_1_CSID_CLK					70
+#define CAM_CC_TFE_1_CSID_CLK_SRC				71
+
+/* CAM_CC power domains */
+#define CAM_CC_CAMSS_TOP_GDSC					0
+
+/* CAM_CC resets */
+#define CAM_CC_BPS_BCR						0
+#define CAM_CC_CAMNOC_BCR					1
+#define CAM_CC_CAMSS_TOP_BCR					2
+#define CAM_CC_CCI_0_BCR					3
+#define CAM_CC_CCI_1_BCR					4
+#define CAM_CC_CPAS_BCR						5
+#define CAM_CC_CRE_BCR						6
+#define CAM_CC_CSI0PHY_BCR					7
+#define CAM_CC_CSI1PHY_BCR					8
+#define CAM_CC_CSI2PHY_BCR					9
+#define CAM_CC_ICP_BCR						10
+#define CAM_CC_MCLK0_BCR					11
+#define CAM_CC_MCLK1_BCR					12
+#define CAM_CC_MCLK2_BCR					13
+#define CAM_CC_MCLK3_BCR					14
+#define CAM_CC_OPE_0_BCR					15
+#define CAM_CC_TFE_0_BCR					16
+#define CAM_CC_TFE_1_BCR					17
+
+#endif
-- 
cgit v1.2.3


From 47bad234eed970d0d1b03204aab1b3644709ee0b Mon Sep 17 00:00:00 2001
From: Ajit Pandey <quic_ajipan@quicinc.com>
Date: Tue, 11 Jun 2024 19:07:50 +0530
Subject: dt-bindings: clock: qcom: add GPUCC clocks on SM4450

Add device tree bindings for the graphics clock controller on
Qualcomm SM4450 platform.

Signed-off-by: Ajit Pandey <quic_ajipan@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240611133752.2192401-7-quic_ajipan@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm4450-gpucc.h | 62 +++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm4450-gpucc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm4450-gpucc.h b/include/dt-bindings/clock/qcom,sm4450-gpucc.h
new file mode 100644
index 000000000000..304f83e5f645
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm4450-gpucc.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM4450_H
+#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM4450_H
+
+/* GPU_CC clocks */
+#define GPU_CC_AHB_CLK						0
+#define GPU_CC_CB_CLK						1
+#define GPU_CC_CRC_AHB_CLK					2
+#define GPU_CC_CX_FF_CLK					3
+#define GPU_CC_CX_GFX3D_CLK					4
+#define GPU_CC_CX_GFX3D_SLV_CLK					5
+#define GPU_CC_CX_GMU_CLK					6
+#define GPU_CC_CX_SNOC_DVM_CLK					7
+#define GPU_CC_CXO_AON_CLK					8
+#define GPU_CC_CXO_CLK						9
+#define GPU_CC_DEMET_CLK					10
+#define GPU_CC_DEMET_DIV_CLK_SRC				11
+#define GPU_CC_FF_CLK_SRC					12
+#define GPU_CC_FREQ_MEASURE_CLK					13
+#define GPU_CC_GMU_CLK_SRC					14
+#define GPU_CC_GX_CXO_CLK					15
+#define GPU_CC_GX_FF_CLK					16
+#define GPU_CC_GX_GFX3D_CLK					17
+#define GPU_CC_GX_GFX3D_CLK_SRC					18
+#define GPU_CC_GX_GFX3D_RDVM_CLK				19
+#define GPU_CC_GX_GMU_CLK					20
+#define GPU_CC_GX_VSENSE_CLK					21
+#define GPU_CC_HUB_AHB_DIV_CLK_SRC				22
+#define GPU_CC_HUB_AON_CLK					23
+#define GPU_CC_HUB_CLK_SRC					24
+#define GPU_CC_HUB_CX_INT_CLK					25
+#define GPU_CC_HUB_CX_INT_DIV_CLK_SRC				26
+#define GPU_CC_MEMNOC_GFX_CLK					27
+#define GPU_CC_MND1X_0_GFX3D_CLK				28
+#define GPU_CC_PLL0						29
+#define GPU_CC_PLL1						30
+#define GPU_CC_SLEEP_CLK					31
+#define GPU_CC_XO_CLK_SRC					32
+#define GPU_CC_XO_DIV_CLK_SRC					33
+
+/* GPU_CC power domains */
+#define GPU_CC_CX_GDSC						0
+#define GPU_CC_GX_GDSC						1
+
+/* GPU_CC resets */
+#define GPU_CC_ACD_BCR						0
+#define GPU_CC_CB_BCR						1
+#define GPU_CC_CX_BCR						2
+#define GPU_CC_FAST_HUB_BCR					3
+#define GPU_CC_FF_BCR						4
+#define GPU_CC_GFX3D_AON_BCR					5
+#define GPU_CC_GMU_BCR						6
+#define GPU_CC_GX_BCR						7
+#define GPU_CC_XO_BCR						8
+#define GPU_CC_GX_ACD_IROOT_BCR					9
+#define GPU_CC_RBCPR_BCR					10
+
+#endif
-- 
cgit v1.2.3


From 89817522b1a58970ac1247f65af009b046344fb0 Mon Sep 17 00:00:00 2001
From: Tengfei Fan <quic_tengfan@quicinc.com>
Date: Mon, 5 Aug 2024 19:08:03 +0200
Subject: dt-bindings: mailbox: qcom-ipcc: Add GPDSP0 and GPDSP1 clients

Add GPDSP0 and GPDSP1 clients for SA8775p platform.

Signed-off-by: Tengfei Fan <quic_tengfan@quicinc.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20240805-topic-sa8775p-iot-remoteproc-v4-2-86affdc72c04@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/mailbox/qcom-ipcc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h
index fbfa3febc66d..fd85a79381b3 100644
--- a/include/dt-bindings/mailbox/qcom-ipcc.h
+++ b/include/dt-bindings/mailbox/qcom-ipcc.h
@@ -33,5 +33,7 @@
 #define IPCC_CLIENT_NSP1		18
 #define IPCC_CLIENT_TME			23
 #define IPCC_CLIENT_WPSS		24
+#define IPCC_CLIENT_GPDSP0		31
+#define IPCC_CLIENT_GPDSP1		32
 
 #endif
-- 
cgit v1.2.3


From 924fc22c282edbf93869b150d9e1b47e0b10485e Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 31 Jul 2024 09:44:57 +0200
Subject: firmware: qcom: qseecom: remove unused functions

qseecom_scm_dev(), qseecom_dma_alloc() and qseecom_dma_free() are no
longer used following the conversion to using tzmem. Remove them.

Fixes: 6612103ec35a ("firmware: qcom: qseecom: convert to using the TZ allocator")
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20240731-tzmem-efivars-fix-v2-2-f0e84071ec07@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_qseecom.h | 45 ------------------------------
 1 file changed, 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h
index 1dc5b3b50aa9..3387897bf368 100644
--- a/include/linux/firmware/qcom/qcom_qseecom.h
+++ b/include/linux/firmware/qcom/qcom_qseecom.h
@@ -25,51 +25,6 @@ struct qseecom_client {
 	u32 app_id;
 };
 
-/**
- * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client.
- * @client: The QSEECOM client device.
- *
- * Returns the SCM device under which the provided QSEECOM client device
- * operates. This function is intended to be used for DMA allocations.
- */
-static inline struct device *qseecom_scm_dev(struct qseecom_client *client)
-{
-	return client->aux_dev.dev.parent->parent;
-}
-
-/**
- * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client.
- * @client:     The QSEECOM client to allocate the memory for.
- * @size:       The number of bytes to allocate.
- * @dma_handle: Pointer to where the DMA address should be stored.
- * @gfp:        Allocation flags.
- *
- * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for
- * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details.
- */
-static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size,
-				      dma_addr_t *dma_handle, gfp_t gfp)
-{
-	return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp);
-}
-
-/**
- * dma_free_coherent() - Free QSEECOM DMA memory.
- * @client:     The QSEECOM client for which the memory has been allocated.
- * @size:       The number of bytes allocated.
- * @cpu_addr:   Virtual memory address to free.
- * @dma_handle: DMA memory address to free.
- *
- * Wrapper function for dma_free_coherent(), freeing memory previously
- * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for
- * details.
- */
-static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
-				    void *cpu_addr, dma_addr_t dma_handle)
-{
-	return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle);
-}
-
 /**
  * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app.
  * @client:   The QSEECOM client associated with the target app.
-- 
cgit v1.2.3


From 6b34e75c48bb913f3431e66353cad9782e7225c7 Mon Sep 17 00:00:00 2001
From: Jingyi Wang <quic_jingyw@quicinc.com>
Date: Wed, 14 Aug 2024 15:28:05 +0800
Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS8275/QCS8300

Add the ID for Qualcomm QCS8275/QCS8300 SoC.

Signed-off-by: Jingyi Wang <quic_jingyw@quicinc.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240814072806.4107079-3-quic_jingyw@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index d6c9e9472121..a213f6244c5d 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -274,6 +274,8 @@
 #define QCOM_ID_QCM8550			604
 #define QCOM_ID_IPQ5300			624
 #define QCOM_ID_IPQ5321			650
+#define QCOM_ID_QCS8300			674
+#define QCOM_ID_QCS8275			675
 
 /*
  * The board type and revision information, used by Qualcomm bootloaders and
-- 
cgit v1.2.3


From 216203bdc2280d8fc5baf60707eee2051de1426e Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 13 Aug 2024 16:15:02 -0600
Subject: UAPI: net/sched: Use __struct_group() in flex struct tc_u32_sel

Use the `__struct_group()` helper to create a new tagged
`struct tc_u32_sel_hdr`. This structure groups together all the
members of the flexible `struct tc_u32_sel` except the flexible
array. As a result, the array is effectively separated from the
rest of the members without modifying the memory layout of the
flexible structure.

This new tagged struct will be used to fix problematic declarations
of middle-flex-arrays in composite structs[1].

[1] https://git.kernel.org/linus/d88cabfd9abc

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/e59fe833564ddc5b2cc83056a4c504be887d6193.1723586870.git.gustavoars@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/pkt_cls.h | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index d36d9cdf0c00..2c32080416b5 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -246,16 +246,19 @@ struct tc_u32_key {
 };
 
 struct tc_u32_sel {
-	unsigned char		flags;
-	unsigned char		offshift;
-	unsigned char		nkeys;
-
-	__be16			offmask;
-	__u16			off;
-	short			offoff;
-
-	short			hoff;
-	__be32			hmask;
+	/* New members MUST be added within the __struct_group() macro below. */
+	__struct_group(tc_u32_sel_hdr, hdr, /* no attrs */,
+		unsigned char		flags;
+		unsigned char		offshift;
+		unsigned char		nkeys;
+
+		__be16			offmask;
+		__u16			off;
+		short			offoff;
+
+		short			hoff;
+		__be32			hmask;
+	);
 	struct tc_u32_key	keys[];
 };
 
-- 
cgit v1.2.3


From 69139d2919dd4aa9a553c8245e7c63e82613e3fc Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sun, 11 Aug 2024 19:21:53 -0700
Subject: vsock: fix recursive ->recvmsg calls

After a vsock socket has been added to a BPF sockmap, its prot->recvmsg
has been replaced with vsock_bpf_recvmsg(). Thus the following
recursiion could happen:

vsock_bpf_recvmsg()
 -> __vsock_recvmsg()
  -> vsock_connectible_recvmsg()
   -> prot->recvmsg()
    -> vsock_bpf_recvmsg() again

We need to fix it by calling the original ->recvmsg() without any BPF
sockmap logic in __vsock_recvmsg().

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Reported-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com
Tested-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com
Cc: Bobby Eshleman <bobby.eshleman@bytedance.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20240812022153.86512-1-xiyou.wangcong@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_vsock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 535701efc1e5..24d970f7a4fa 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -230,8 +230,12 @@ struct vsock_tap {
 int vsock_add_tap(struct vsock_tap *vt);
 int vsock_remove_tap(struct vsock_tap *vt);
 void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
+int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+				int flags);
 int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			      int flags);
+int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+			  size_t len, int flags);
 int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 			size_t len, int flags);
 
-- 
cgit v1.2.3


From 9bb5e74b2bf88fbb024bb15ded3b011e02c673be Mon Sep 17 00:00:00 2001
From: Griffin Kroah-Hartman <griffin@kroah.com>
Date: Thu, 15 Aug 2024 11:49:20 +0200
Subject: Revert "misc: fastrpc: Restrict untrusted app to attach to privileged
 PD"

This reverts commit bab2f5e8fd5d2f759db26b78d9db57412888f187.

Joel reported that this commit breaks userspace and stops sensors in
SDM845 from working. Also breaks other qcom SoC devices running postmarketOS.

Cc: stable <stable@kernel.org>
Cc: Ekansh Gupta <quic_ekangupt@quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reported-by: Joel Selvaraj <joelselvaraj.oss@gmail.com>
Link: https://lore.kernel.org/r/9a9f5646-a554-4b65-8122-d212bb665c81@umsystem.edu
Signed-off-by: Griffin Kroah-Hartman <griffin@kroah.com>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Fixes: bab2f5e8fd5d ("misc: fastrpc: Restrict untrusted app to attach to privileged PD")
Link: https://lore.kernel.org/r/20240815094920.8242-1-griffin@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/misc/fastrpc.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 91583690bddc..f33d914d8f46 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -8,14 +8,11 @@
 #define FASTRPC_IOCTL_ALLOC_DMA_BUFF	_IOWR('R', 1, struct fastrpc_alloc_dma_buf)
 #define FASTRPC_IOCTL_FREE_DMA_BUFF	_IOWR('R', 2, __u32)
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
-/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
 #define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
 #define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
-/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_ATTACH_SNS	_IO('R', 8)
-/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
 #define FASTRPC_IOCTL_MEM_MAP		_IOWR('R', 10, struct fastrpc_mem_map)
 #define FASTRPC_IOCTL_MEM_UNMAP		_IOWR('R', 11, struct fastrpc_mem_unmap)
-- 
cgit v1.2.3


From aaf8c0b9ae042494cb4585883b15c1332de77840 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Wed, 26 Jun 2024 09:47:27 +0800
Subject: f2fs: reduce expensive checkpoint trigger frequency

We may trigger high frequent checkpoint for below case:
1. mkdir /mnt/dir1; set dir1 encrypted
2. touch /mnt/file1; fsync /mnt/file1
3. mkdir /mnt/dir2; set dir2 encrypted
4. touch /mnt/file2; fsync /mnt/file2
...

Although, newly created dir and file are not related, due to
commit bbf156f7afa7 ("f2fs: fix lost xattrs of directories"), we will
trigger checkpoint whenever fsync() comes after a new encrypted dir
created.

In order to avoid such performance regression issue, let's record an
entry including directory's ino in global cache whenever we update
directory's xattr data, and then triggerring checkpoint() only if
xattr metadata of target file's parent was updated.

This patch updates to cover below no encryption case as well:
1) parent is checkpointed
2) set_xattr(dir) w/ new xnid
3) create(file)
4) fsync(file)

Fixes: bbf156f7afa7 ("f2fs: fix lost xattrs of directories")
Reported-by: wangzijie <wangzijie1@honor.com>
Reported-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Tested-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Reported-by: Yunlei He <heyunlei@hihonor.com>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index ed794b5fefbe..2851c823095b 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE);
 		{ CP_NODE_NEED_CP,	"node needs cp" },		\
 		{ CP_FASTBOOT_MODE,	"fastboot mode" },		\
 		{ CP_SPEC_LOG_NUM,	"log type is 2" },		\
-		{ CP_RECOVER_DIR,	"dir needs recovery" })
+		{ CP_RECOVER_DIR,	"dir needs recovery" },		\
+		{ CP_XATTR_DIR,		"dir's xattr updated" })
 
 #define show_shutdown_mode(type)					\
 	__print_symbolic(type,						\
-- 
cgit v1.2.3


From fda70207135b60dd1a7c8f16cc036bb1cc344490 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Tue, 16 Apr 2024 11:01:23 +0200
Subject: context_tracking, rcu: Rename rcu_dynticks_curr_cpu_in_eqs() into
 rcu_is_watching_curr_cpu()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, reflect that change in the related helpers.

Note that "watching" is the opposite of "in EQS", so the negation is lifted
out of the helper and into the callsites.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index a6c36780cc3b..d53092ffa9db 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -113,13 +113,17 @@ extern void ct_idle_enter(void);
 extern void ct_idle_exit(void);
 
 /*
- * Is the current CPU in an extended quiescent state?
+ * Is RCU watching the current CPU (IOW, it is not in an extended quiescent state)?
+ *
+ * Note that this returns the actual boolean data (watching / not watching),
+ * whereas ct_rcu_watching() returns the RCU_WATCHING subvariable of
+ * context_tracking.state.
  *
  * No ordering, as we are sampling CPU-local information.
  */
-static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
+static __always_inline bool rcu_is_watching_curr_cpu(void)
 {
-	return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING);
+	return raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING;
 }
 
 /*
@@ -140,7 +144,7 @@ static __always_inline bool warn_rcu_enter(void)
 	 * lots of the actual reporting also relies on RCU.
 	 */
 	preempt_disable_notrace();
-	if (rcu_dynticks_curr_cpu_in_eqs()) {
+	if (!rcu_is_watching_curr_cpu()) {
 		ret = true;
 		ct_state_inc(CT_RCU_WATCHING);
 	}
-- 
cgit v1.2.3


From 32a9f26e5e266f0116cc8536e5e4544523ddd334 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Mon, 29 Apr 2024 16:43:36 +0200
Subject: rcu: Rename rcu_momentary_dyntick_idle() into rcu_momentary_eqs()

The context_tracking.state RCU_DYNTICKS subvariable has been renamed to
RCU_WATCHING, replace "dyntick_idle" into "eqs" to drop the dyntick
reference.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcutiny.h | 2 +-
 include/linux/rcutree.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d9ac7b136aea..cf2b5a188f78 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -158,7 +158,7 @@ void rcu_scheduler_starting(void);
 static inline void rcu_end_inkernel_boot(void) { }
 static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
 static inline bool rcu_is_watching(void) { return true; }
-static inline void rcu_momentary_dyntick_idle(void) { }
+static inline void rcu_momentary_eqs(void) { }
 static inline void kfree_rcu_scheduler_running(void) { }
 static inline bool rcu_gp_might_be_stalled(void) { return false; }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 254244202ea9..7dbde2b6f714 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -37,7 +37,7 @@ void synchronize_rcu_expedited(void);
 void kvfree_call_rcu(struct rcu_head *head, void *ptr);
 
 void rcu_barrier(void);
-void rcu_momentary_dyntick_idle(void);
+void rcu_momentary_eqs(void);
 void kfree_rcu_scheduler_running(void);
 bool rcu_gp_might_be_stalled(void);
 
-- 
cgit v1.2.3


From 4f336dc07eceb77d2164bc1121a5ae6003b19f55 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Mon, 22 Apr 2024 13:57:29 +0200
Subject: context_tracking, rcu: Rename rcu_dyntick trace event into
 rcu_watching

The "rcu_dyntick" naming convention has been turned into "rcu_watching" for
all helpers now, align the trace event to that.

To add to the confusion, the strings passed to the trace event are now
reversed: when RCU "starts" the dyntick / EQS state, it "stops" watching.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/trace/events/rcu.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 4066b6d51e46..e81431deaa50 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -466,40 +466,40 @@ TRACE_EVENT(rcu_stall_warning,
 /*
  * Tracepoint for dyntick-idle entry/exit events.  These take 2 strings
  * as argument:
- * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not
- *            being in dyntick-idle mode.
+ * polarity: "Start", "End", "StillWatching" for entering, exiting or still not
+ *            being in EQS mode.
  * context: "USER" or "IDLE" or "IRQ".
  * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context.
  *
  * These events also take a pair of numbers, which indicate the nesting
  * depth before and after the event of interest, and a third number that is
- * the ->dynticks counter.  Note that task-related and interrupt-related
+ * the RCU_WATCHING counter.  Note that task-related and interrupt-related
  * events use two separate counters, and that the "++=" and "--=" events
  * for irq/NMI will change the counter by two, otherwise by one.
  */
-TRACE_EVENT_RCU(rcu_dyntick,
+TRACE_EVENT_RCU(rcu_watching,
 
-	TP_PROTO(const char *polarity, long oldnesting, long newnesting, int dynticks),
+	TP_PROTO(const char *polarity, long oldnesting, long newnesting, int counter),
 
-	TP_ARGS(polarity, oldnesting, newnesting, dynticks),
+	TP_ARGS(polarity, oldnesting, newnesting, counter),
 
 	TP_STRUCT__entry(
 		__field(const char *, polarity)
 		__field(long, oldnesting)
 		__field(long, newnesting)
-		__field(int, dynticks)
+		__field(int, counter)
 	),
 
 	TP_fast_assign(
 		__entry->polarity = polarity;
 		__entry->oldnesting = oldnesting;
 		__entry->newnesting = newnesting;
-		__entry->dynticks = dynticks;
+		__entry->counter = counter;
 	),
 
 	TP_printk("%s %lx %lx %#3x", __entry->polarity,
 		  __entry->oldnesting, __entry->newnesting,
-		  __entry->dynticks & 0xfff)
+		  __entry->counter & 0xfff)
 );
 
 /*
-- 
cgit v1.2.3


From 4040b1139904b3f72b56862fc9a8d3e9abb69ffb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 17 Jun 2024 08:51:14 -0700
Subject: context_tracking: Tag context_tracking_enabled_this_cpu()
 __always_inline

Force context_tracking_enabled_this_cpu() to be inlined so that invoking
it from guest_context_enter_irqoff(), which KVM uses in non-instrumentable
code, doesn't unexpectedly leave a noinstr section.

  vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1c7: call to
    context_tracking_enabled_this_cpu() leaves .noinstr.text section
  vmlinux.o: warning: objtool: svm_vcpu_enter_exit+0x83: call to
    context_tracking_enabled_this_cpu() leaves .noinstr.text section

Note, the CONFIG_CONTEXT_TRACKING_USER=n stub is already __always_inline.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/context_tracking_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 0dbda59c9f37..7b8433d5a8ef 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -113,7 +113,7 @@ static __always_inline bool context_tracking_enabled_cpu(int cpu)
 	return context_tracking_enabled() && per_cpu(context_tracking.active, cpu);
 }
 
-static inline bool context_tracking_enabled_this_cpu(void)
+static __always_inline bool context_tracking_enabled_this_cpu(void)
 {
 	return context_tracking_enabled() && __this_cpu_read(context_tracking.active);
 }
-- 
cgit v1.2.3


From a98ae7f045b29de4f48b191d5aeb4e803183d759 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 25 Jul 2024 12:18:40 +0200
Subject: lib/string_choices: Add str_up_down() helper

Add str_up_down() helper to return "up" or "down" string literal.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20240725101841.574-1-michal.wajdeczko@intel.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index d9ebe20229f8..bcde3c9cff81 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -42,6 +42,11 @@ static inline const char *str_yes_no(bool v)
 	return v ? "yes" : "no";
 }
 
+static inline const char *str_up_down(bool v)
+{
+	return v ? "up" : "down";
+}
+
 /**
  * str_plural - Return the simple pluralization based on English counts
  * @num: Number used for deciding pluralization
-- 
cgit v1.2.3


From f5c1ca3a15fdb867d2b535003f74e0b975eff116 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 12 Aug 2024 11:29:40 -0700
Subject: string_choices: Add wrapper for str_down_up()

The string choice functions which are not clearly true/false synonyms
also have inverted wrappers. Add this for str_down_up() as well.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20240812182939.work.424-kees@kernel.org
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index bcde3c9cff81..1320bcdcb89c 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -46,6 +46,7 @@ static inline const char *str_up_down(bool v)
 {
 	return v ? "up" : "down";
 }
+#define str_down_up(v)		str_up_down(!(v))
 
 /**
  * str_plural - Return the simple pluralization based on English counts
-- 
cgit v1.2.3


From aae6b81260fd9a7224f7eb4fc440d625852245bb Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 12 Aug 2024 10:43:48 -0400
Subject: Bluetooth: HCI: Invert LE State quirk to be opt-out rather then
 opt-in

This inverts the LE State quirk so by default we assume the controllers
would report valid states rather than invalid which is how quirks
normally behave, also this would result in HCI command failing it the LE
States are really broken thus exposing the controllers that are really
broken in this respect.

Link: https://github.com/bluez/bluez/issues/584
Fixes: 220915857e29 ("Bluetooth: Adding driver and quirk defs for multi-role LE")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h      | 17 ++++++++++-------
 include/net/bluetooth/hci_core.h |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e372a88e8c3f..d1d073089f38 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -206,14 +206,17 @@ enum {
 	 */
 	HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
 
-	/* When this quirk is set, the controller has validated that
-	 * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are
-	 * valid.  This mechanism is necessary as many controllers have
-	 * been seen has having trouble initiating a connectable
-	 * advertisement despite the state combination being reported as
-	 * supported.
+	/* When this quirk is set, the LE states reported through the
+	 * HCI_LE_READ_SUPPORTED_STATES are invalid/broken.
+	 *
+	 * This mechanism is necessary as many controllers have been seen has
+	 * having trouble initiating a connectable advertisement despite the
+	 * state combination being reported as supported.
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
 	 */
-	HCI_QUIRK_VALID_LE_STATES,
+	HCI_QUIRK_BROKEN_LE_STATES,
 
 	/* When this quirk is set, then erroneous data reporting
 	 * is ignored. This is mainly due to the fact that the HCI
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 31020891fc68..e449dba698f3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock;
 	} while (0)
 
 #define hci_dev_le_state_simultaneous(hdev) \
-	(test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \
+	(!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \
 	 (hdev->le_states[4] & 0x08) &&	/* Central */ \
 	 (hdev->le_states[4] & 0x40) &&	/* Peripheral */ \
 	 (hdev->le_states[3] & 0x10))	/* Simultaneous */
-- 
cgit v1.2.3


From 648b4bde0aca2980ebc0b90cdfbb80d222370c3d Mon Sep 17 00:00:00 2001
From: Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
Date: Mon, 12 Aug 2024 10:43:02 +0530
Subject: dt-bindings: clock: qcom: Add GPLL9 support on gcc-sc8180x

Add the missing GPLL9 which is required for the gcc sdcc2 clock.

Fixes: 0fadcdfdcf57 ("dt-bindings: clock: Add SC8180x GCC binding")
Cc: stable@vger.kernel.org
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
Link: https://lore.kernel.org/r/20240812-gcc-sc8180x-fixes-v2-2-8b3eaa5fb856@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,gcc-sc8180x.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
index 487b12c19db5..e364006aa6ea 100644
--- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h
+++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
@@ -248,6 +248,7 @@
 #define GCC_USB3_SEC_CLKREF_CLK					238
 #define GCC_UFS_MEM_CLKREF_EN					239
 #define GCC_UFS_CARD_CLKREF_EN					240
+#define GPLL9							241
 
 #define GCC_EMAC_BCR						0
 #define GCC_GPU_BCR						1
-- 
cgit v1.2.3


From c580e7bfc0cd140b8d3cf73183e08ca8b23326db Mon Sep 17 00:00:00 2001
From: Danila Tikhonov <danila@jiaxyga.com>
Date: Thu, 8 Aug 2024 21:40:15 +0300
Subject: dt-bindings: arm: qcom,ids: Add IDs for SM7325 family

Add Qualcomm SM7325/SM7325P (yupik) SoC IDs.

Signed-off-by: Danila Tikhonov <danila@jiaxyga.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240808184048.63030-2-danila@jiaxyga.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index a213f6244c5d..8332f8d82f96 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -234,11 +234,13 @@
 #define QCOM_ID_SA8540P			461
 #define QCOM_ID_QCM4290			469
 #define QCOM_ID_QCS4290			470
+#define QCOM_ID_SM7325			475
 #define QCOM_ID_SM8450_2		480
 #define QCOM_ID_SM8450_3		482
 #define QCOM_ID_SC7280			487
 #define QCOM_ID_SC7180P			495
 #define QCOM_ID_QCM6490			497
+#define QCOM_ID_SM7325P			499
 #define QCOM_ID_IPQ5000			503
 #define QCOM_ID_IPQ0509			504
 #define QCOM_ID_IPQ0518			505
-- 
cgit v1.2.3


From 015dff12dfdeb8d94115ec829bc2e4b711075935 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Date: Wed, 14 Aug 2024 18:20:22 +0200
Subject: dt-bindings: clock: gcc-msm8998: Add Q6 and LPASS clocks definitions

Add definitions for the Q6 BIMC, LPASS core and adsp smmu clocks,
required to enable audio functionality on MSM8998.

Add the GDSC definitions for the LPASS_ADSP_GDSC and LPASS_CORE_GDSC
as a final step to enable the required clock tree for the lpass iommu
and for the audio dsp itself.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Signed-off-by: Marc Gonzalez <mgonzalez@freebox.fr>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240814-lpass-v1-1-a5bb8f9dfa8b@freebox.fr
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,gcc-msm8998.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-msm8998.h b/include/dt-bindings/clock/qcom,gcc-msm8998.h
index b5456a64d421..5b0dde080900 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8998.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8998.h
@@ -193,10 +193,15 @@
 #define GCC_MMSS_GPLL0_DIV_CLK					184
 #define GCC_GPU_GPLL0_DIV_CLK					185
 #define GCC_GPU_GPLL0_CLK					186
+#define HLOS1_VOTE_LPASS_CORE_SMMU_CLK				187
+#define HLOS1_VOTE_LPASS_ADSP_SMMU_CLK				188
+#define GCC_MSS_Q6_BIMC_AXI_CLK					189
 
 #define PCIE_0_GDSC						0
 #define UFS_GDSC						1
 #define USB_30_GDSC						2
+#define LPASS_ADSP_GDSC						3
+#define LPASS_CORE_GDSC						4
 
 #define GCC_BLSP1_QUP1_BCR					0
 #define GCC_BLSP1_QUP2_BCR					1
-- 
cgit v1.2.3


From a500427c84d1c7c59ebef6a70895fdf28ddb0884 Mon Sep 17 00:00:00 2001
From: Varadarajan Narayanan <quic_varada@quicinc.com>
Date: Tue, 30 Jul 2024 11:18:13 +0530
Subject: dt-bindings: interconnect: Add Qualcomm IPQ5332 support

Add interconnect-cells to clock provider so that it can be
used as icc provider.

Add master/slave ids for Qualcomm IPQ5332 Network-On-Chip
interfaces. This will be used by the gcc-ipq5332 driver
for providing interconnect services using the icc-clk
framework.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Varadarajan Narayanan <quic_varada@quicinc.com>
Link: https://lore.kernel.org/r/20240730054817.1915652-2-quic_varada@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/interconnect/qcom,ipq5332.h | 46 +++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,ipq5332.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,ipq5332.h b/include/dt-bindings/interconnect/qcom,ipq5332.h
new file mode 100644
index 000000000000..16475bb07a48
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,ipq5332.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+#ifndef INTERCONNECT_QCOM_IPQ5332_H
+#define INTERCONNECT_QCOM_IPQ5332_H
+
+#define MASTER_SNOC_PCIE3_1_M		0
+#define SLAVE_SNOC_PCIE3_1_M		1
+#define MASTER_ANOC_PCIE3_1_S		2
+#define SLAVE_ANOC_PCIE3_1_S		3
+#define MASTER_SNOC_PCIE3_2_M		4
+#define SLAVE_SNOC_PCIE3_2_M		5
+#define MASTER_ANOC_PCIE3_2_S		6
+#define SLAVE_ANOC_PCIE3_2_S		7
+#define MASTER_SNOC_USB			8
+#define SLAVE_SNOC_USB			9
+#define MASTER_NSSNOC_NSSCC		10
+#define SLAVE_NSSNOC_NSSCC		11
+#define MASTER_NSSNOC_SNOC_0		12
+#define SLAVE_NSSNOC_SNOC_0		13
+#define MASTER_NSSNOC_SNOC_1		14
+#define SLAVE_NSSNOC_SNOC_1		15
+#define MASTER_NSSNOC_ATB		16
+#define SLAVE_NSSNOC_ATB		17
+#define MASTER_NSSNOC_PCNOC_1		18
+#define SLAVE_NSSNOC_PCNOC_1		19
+#define MASTER_NSSNOC_QOSGEN_REF	20
+#define SLAVE_NSSNOC_QOSGEN_REF		21
+#define MASTER_NSSNOC_TIMEOUT_REF	22
+#define SLAVE_NSSNOC_TIMEOUT_REF	23
+#define MASTER_NSSNOC_XO_DCD		24
+#define SLAVE_NSSNOC_XO_DCD		25
+
+#define MASTER_NSSNOC_PPE		0
+#define SLAVE_NSSNOC_PPE		1
+#define MASTER_NSSNOC_PPE_CFG		2
+#define SLAVE_NSSNOC_PPE_CFG		3
+#define MASTER_NSSNOC_NSS_CSR		4
+#define SLAVE_NSSNOC_NSS_CSR		5
+#define MASTER_NSSNOC_CE_APB		6
+#define SLAVE_NSSNOC_CE_APB		7
+#define MASTER_NSSNOC_CE_AXI		8
+#define SLAVE_NSSNOC_CE_AXI		9
+
+#define MASTER_CNOC_AHB			0
+#define SLAVE_CNOC_AHB			1
+
+#endif /* INTERCONNECT_QCOM_IPQ5332_H */
-- 
cgit v1.2.3


From de67763cbdbba8149eeb5d45ad0c6834f7c7b352 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 9 Aug 2024 16:54:06 -0700
Subject: ip: Move INFINITY_LIFE_TIME to addrconf.h.

INFINITY_LIFE_TIME is the common value used in IPv4 and IPv6 but defined
in both .c files.

Also, 0xffffffff used in addrconf_timeout_fixup() is INFINITY_LIFE_TIME.

Let's move INFINITY_LIFE_TIME's definition to addrconf.h

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20240809235406.50187-6-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/addrconf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b18e81f0c9e1..c8ed31828db3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -187,10 +187,12 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
 	return 0;
 }
 
+#define INFINITY_LIFE_TIME 0xFFFFFFFF
+
 static inline unsigned long addrconf_timeout_fixup(u32 timeout,
 						   unsigned int unit)
 {
-	if (timeout == 0xffffffff)
+	if (timeout == INFINITY_LIFE_TIME)
 		return ~0UL;
 
 	/*
-- 
cgit v1.2.3


From 0cb70ee4a6ee6b0a4b0e0f70a64a094c6fe05944 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 14 Aug 2024 13:22:25 +0800
Subject: virtio: rename virtio_config_enabled to virtio_config_core_enabled

Following patch will allow the config interrupt to be disabled by a
specific driver via another boolean. So this patch renames
virtio_config_enabled and relevant helpers to
virtio_config_core_enabled.

Cc: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Cc: Gia-Khanh Nguyen <gia-khanh.nguyen@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/20240814052228.4654-2-jasowang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4b16844c6bc2..de6d3cc176d9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -118,7 +118,7 @@ struct virtio_admin_cmd {
  * struct virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
  * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
- * @config_enabled: configuration change reporting enabled
+ * @config_core_enabled: configuration change reporting enabled by core
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
  * @vqs_list_lock: protects @vqs.
@@ -135,7 +135,7 @@ struct virtio_admin_cmd {
 struct virtio_device {
 	int index;
 	bool failed;
-	bool config_enabled;
+	bool config_core_enabled;
 	bool config_change_pending;
 	spinlock_t config_lock;
 	spinlock_t vqs_list_lock;
-- 
cgit v1.2.3


From 224de6f886f8184fd448700a6d78f216694de948 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 14 Aug 2024 13:22:26 +0800
Subject: virtio: allow driver to disable the configure change notification

Sometime, it would be useful to disable the configure change
notification from the driver. So this patch allows this by introducing
a variable config_change_driver_disabled and only allow the configure
change notification callback to be triggered when it is allowed by
both the virtio core and the driver. It is set to false by default to
hold the current semantic so we don't need to change any drivers.

The first user for this would be virtio-net.

Cc: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Cc: Gia-Khanh Nguyen <gia-khanh.nguyen@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/20240814052228.4654-3-jasowang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index de6d3cc176d9..306137a15d07 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -119,6 +119,8 @@ struct virtio_admin_cmd {
  * @index: unique position on the virtio bus
  * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
  * @config_core_enabled: configuration change reporting enabled by core
+ * @config_driver_disabled: configuration change reporting disabled by
+ *                          a driver
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
  * @vqs_list_lock: protects @vqs.
@@ -136,6 +138,7 @@ struct virtio_device {
 	int index;
 	bool failed;
 	bool config_core_enabled;
+	bool config_driver_disabled;
 	bool config_change_pending;
 	spinlock_t config_lock;
 	spinlock_t vqs_list_lock;
@@ -166,6 +169,10 @@ void __virtqueue_break(struct virtqueue *_vq);
 void __virtqueue_unbreak(struct virtqueue *_vq);
 
 void virtio_config_changed(struct virtio_device *dev);
+
+void virtio_config_driver_disable(struct virtio_device *dev);
+void virtio_config_driver_enable(struct virtio_device *dev);
+
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
-- 
cgit v1.2.3


From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 1 Aug 2024 22:47:48 +0200
Subject: mm/hugetlb: fix hugetlb vs. core-mm PT locking

We recently made GUP's common page table walking code to also walk hugetlb
VMAs without most hugetlb special-casing, preparing for the future of
having less hugetlb-specific page table walking code in the codebase.
Turns out that we missed one page table locking detail: page table locking
for hugetlb folios that are not mapped using a single PMD/PUD.

Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB
hugetlb folios on arm64 with 4 KiB base page size).  GUP, as it walks the
page tables, will perform a pte_offset_map_lock() to grab the PTE table
lock.

However, hugetlb that concurrently modifies these page tables would
actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the
locks would differ.  Something similar can happen right now with hugetlb
folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS.

This issue can be reproduced [1], for example triggering:

[ 3105.936100] ------------[ cut here ]------------
[ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188
[ 3105.944634] Modules linked in: [...]
[ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1
[ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024
[ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3105.991108] pc : try_grab_folio+0x11c/0x188
[ 3105.994013] lr : follow_page_pte+0xd8/0x430
[ 3105.996986] sp : ffff80008eafb8f0
[ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43
[ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48
[ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978
[ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001
[ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000
[ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000
[ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0
[ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080
[ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000
[ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000
[ 3106.047957] Call trace:
[ 3106.049522]  try_grab_folio+0x11c/0x188
[ 3106.051996]  follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0
[ 3106.055527]  follow_page_mask+0x1a0/0x2b8
[ 3106.058118]  __get_user_pages+0xf0/0x348
[ 3106.060647]  faultin_page_range+0xb0/0x360
[ 3106.063651]  do_madvise+0x340/0x598

Let's make huge_pte_lockptr() effectively use the same PT locks as any
core-mm page table walker would.  Add ptep_lockptr() to obtain the PTE
page table lock using a pte pointer -- unfortunately we cannot convert
pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables
we can have with CONFIG_HIGHPTE.

Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such
that when e.g., CONFIG_PGTABLE_LEVELS==2 with
PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected.  Document
why that works.

There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb
folio being mapped using two PTE page tables.  While hugetlb wants to take
the PMD table lock, core-mm would grab the PTE table lock of one of both
PTE page tables.  In such corner cases, we have to make sure that both
locks match, which is (fortunately!) currently guaranteed for 8xx as it
does not support SMP and consequently doesn't use split PT locks.

[1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/

Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com
Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++---
 include/linux/mm.h      | 11 +++++++++++
 2 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c9bf68c239a0..45bf05ad5c53 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
 {
-	if (huge_page_size(h) == PMD_SIZE)
+	const unsigned long size = huge_page_size(h);
+
+	VM_WARN_ON(size == PAGE_SIZE);
+
+	/*
+	 * hugetlb must use the exact same PT locks as core-mm page table
+	 * walkers would. When modifying a PTE table, hugetlb must take the
+	 * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
+	 * PT lock etc.
+	 *
+	 * The expectation is that any hugetlb folio smaller than a PMD is
+	 * always mapped into a single PTE table and that any hugetlb folio
+	 * smaller than a PUD (but at least as big as a PMD) is always mapped
+	 * into a single PMD table.
+	 *
+	 * If that does not hold for an architecture, then that architecture
+	 * must disable split PT locks such that all *_lockptr() functions
+	 * will give us the same result: the per-MM PT lock.
+	 *
+	 * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
+	 * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
+	 * and core-mm would use pmd_lockptr(). However, in such configurations
+	 * split PMD locks are disabled -- they don't make sense on a single
+	 * PGDIR page table -- and the end result is the same.
+	 */
+	if (size >= PUD_SIZE)
+		return pud_lockptr(mm, (pud_t *) pte);
+	else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
 		return pmd_lockptr(mm, (pmd_t *) pte);
-	VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
-	return &mm->page_table_lock;
+	/* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
+	return ptep_lockptr(mm, pte);
 }
 
 #ifndef hugepages_supported
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4b238a20b76..6549d0979b28 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 	return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
 }
 
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
+	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
+	return ptlock_ptr(virt_to_ptdesc(pte));
+}
+
 static inline bool ptlock_init(struct ptdesc *ptdesc)
 {
 	/*
@@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
+static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
+{
+	return &mm->page_table_lock;
+}
 static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
-- 
cgit v1.2.3


From f4cb78af91e3b2b7aa76dbf8213b898fa8811b12 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 8 Aug 2024 21:34:35 +0000
Subject: mm: add system wide stats items category

/proc/vmstat contains events and stats, events can only grow, but stats
can grow and shrink.

vmstat has the following:
-------------------------
NR_VM_ZONE_STAT_ITEMS:	per-zone stats
NR_VM_NUMA_EVENT_ITEMS:	per-numa events
NR_VM_NODE_STAT_ITEMS:	per-numa stats
NR_VM_WRITEBACK_STAT_ITEMS:	system-wide background-writeback and
				dirty-throttling tresholds.
NR_VM_EVENT_ITEMS:	system-wide events
-------------------------

Rename NR_VM_WRITEBACK_STAT_ITEMS to NR_VM_STAT_ITEMS, to track the
system-wide stats, we are going to add per-page metadata stats to this
category in the next patch.

Also delete unused writeback_stat_name().

Link: https://lkml.kernel.org/r/20240809191020.1142142-2-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240808213437.682006-3-pasha.tatashin@soleen.com
Fixes: 15995a352474 ("mm: report per-page metadata information")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Suggested-by: Yosry Ahmed <yosryahmed@google.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Joel Granados <j.granados@samsung.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yi Zhang <yi.zhang@redhat.com>
Cc: Fan Ni <fan.ni@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vmstat.h | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 23cd17942036..9ab4fa5e09b5 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -34,10 +34,11 @@ struct reclaim_stat {
 	unsigned nr_lazyfree_fail;
 };
 
-enum writeback_stat_item {
+/* Stat data for system wide items */
+enum vm_stat_item {
 	NR_DIRTY_THRESHOLD,
 	NR_DIRTY_BG_THRESHOLD,
-	NR_VM_WRITEBACK_STAT_ITEMS,
+	NR_VM_STAT_ITEMS,
 };
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
@@ -514,21 +515,13 @@ static inline const char *lru_list_name(enum lru_list lru)
 	return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
 }
 
-static inline const char *writeback_stat_name(enum writeback_stat_item item)
-{
-	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-			   NR_VM_NUMA_EVENT_ITEMS +
-			   NR_VM_NODE_STAT_ITEMS +
-			   item];
-}
-
 #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
 static inline const char *vm_event_name(enum vm_event_item item)
 {
 	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
 			   NR_VM_NUMA_EVENT_ITEMS +
 			   NR_VM_NODE_STAT_ITEMS +
-			   NR_VM_WRITEBACK_STAT_ITEMS +
+			   NR_VM_STAT_ITEMS +
 			   item];
 }
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
-- 
cgit v1.2.3


From 9d85731110241fb8ca9445ea4177d816041a8825 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 8 Aug 2024 21:34:36 +0000
Subject: mm: don't account memmap per-node

Fix invalid access to pgdat during hot-remove operation:
ndctl users reported a GPF when trying to destroy a namespace:
$ ndctl destroy-namespace all -r all -f
 Segmentation fault
 dmesg:
 Oops: general protection fault, probably for
 non-canonical address 0xdffffc0000005650: 0000 [#1] PREEMPT SMP KASAN
 PTI
 KASAN: probably user-memory-access in range
 [0x000000000002b280-0x000000000002b287]
 CPU: 26 UID: 0 PID: 1868 Comm: ndctl Not tainted 6.11.0-rc1 #1
 Hardware name: Dell Inc. PowerEdge R640/08HT8T, BIOS
 2.20.1 09/13/2023
 RIP: 0010:mod_node_page_state+0x2a/0x110

cxl-test users report a GPF when trying to unload the test module:
$ modrpobe -r cxl-test
 dmesg
 BUG: unable to handle page fault for address: 0000000000004200
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 0 UID: 0 PID: 1076 Comm: modprobe Tainted: G O N 6.11.0-rc1 #197
 Tainted: [O]=OOT_MODULE, [N]=TEST
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/15
 RIP: 0010:mod_node_page_state+0x6/0x90

Currently, when memory is hot-plugged or hot-removed the accounting is
done based on the assumption that memmap is allocated from the same node
as the hot-plugged/hot-removed memory, which is not always the case.

In addition, there are challenges with keeping the node id of the memory
that is being remove to the time when memmap accounting is actually
performed: since this is done after remove_pfn_range_from_zone(), and
also after remove_memory_block_devices(). Meaning that we cannot use
pgdat nor walking though memblocks to get the nid.

Given all of that, account the memmap overhead system wide instead.

For this we are going to be using global atomic counters, but given that
memmap size is rarely modified, and normally is only modified either
during early boot when there is only one CPU, or under a hotplug global
mutex lock, therefore there is no need for per-cpu optimizations.

Also, while we are here rename nr_memmap to nr_memmap_pages, and
nr_memmap_boot to nr_memmap_boot_pages to be self explanatory that the
units are in page count.

[pasha.tatashin@soleen.com: address a few nits from David Hildenbrand]
  Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240808213437.682006-4-pasha.tatashin@soleen.com
Fixes: 15995a352474 ("mm: report per-page metadata information")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/linux-cxl/CAHj4cs9Ax1=CoJkgBGP_+sNu6-6=6v=_L-ZBZY0bVLD3wUWZQg@mail.gmail.com
Reported-by: Alison Schofield <alison.schofield@intel.com>
Closes: https://lore.kernel.org/linux-mm/Zq0tPd2h6alFz8XF@aschofie-mobl2/#t
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Fan Ni <fan.ni@samsung.com>
Cc: Joel Granados <j.granados@samsung.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 2 --
 include/linux/vmstat.h | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 41458892bc8a..1dc6248feb83 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,8 +220,6 @@ enum node_stat_item {
 	PGDEMOTE_KSWAPD,
 	PGDEMOTE_DIRECT,
 	PGDEMOTE_KHUGEPAGED,
-	NR_MEMMAP, /* page metadata allocated through buddy allocator */
-	NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 9ab4fa5e09b5..9eb77c9007e6 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -38,6 +38,8 @@ struct reclaim_stat {
 enum vm_stat_item {
 	NR_DIRTY_THRESHOLD,
 	NR_DIRTY_BG_THRESHOLD,
+	NR_MEMMAP_PAGES,	/* page metadata allocated through buddy allocator */
+	NR_MEMMAP_BOOT_PAGES,	/* page metadata allocated through boot allocator */
 	NR_VM_STAT_ITEMS,
 };
 
@@ -618,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
 	lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
 }
 
-void __meminit mod_node_early_perpage_metadata(int nid, long delta);
-void __meminit store_early_perpage_metadata(void);
-
+void memmap_boot_pages_add(long delta);
+void memmap_pages_add(long delta);
 #endif /* _LINUX_VMSTAT_H */
-- 
cgit v1.2.3


From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Tue, 13 Aug 2024 08:07:56 -0700
Subject: alloc_tag: introduce clear_page_tag_ref() helper function

In several cases we are freeing pages which were not allocated using
common page allocators.  For such cases, in order to keep allocation
accounting correct, we should clear the page tag to indicate that the page
being freed is expected to not have a valid allocation tag.  Introduce
clear_page_tag_ref() helper function to be used for this.

Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com
Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[6.10]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc_tag.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 18cd0c0c73d9..207f0c83c8e9 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref)
 	page_ext_put(page_ext_from_codetag_ref(ref));
 }
 
+static inline void clear_page_tag_ref(struct page *page)
+{
+	if (mem_alloc_profiling_enabled()) {
+		union codetag_ref *ref = get_page_tag_ref(page);
+
+		if (ref) {
+			set_codetag_empty(ref);
+			put_page_tag_ref(ref);
+		}
+	}
+}
+
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr)
 {
@@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 
 static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
 static inline void put_page_tag_ref(union codetag_ref *ref) {}
+static inline void clear_page_tag_ref(struct page *page) {}
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr) {}
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-- 
cgit v1.2.3


From 67c3ca2c5cfe6a50772514e3349b5e7b3b0fac03 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 15 Aug 2024 03:07:02 +0300
Subject: net: mscc: ocelot: use ocelot_xmit_get_vlan_info() also for FDMA and
 register injection

Problem description
-------------------

On an NXP LS1028A (felix DSA driver) with the following configuration:

- ocelot-8021q tagging protocol
- VLAN-aware bridge (with STP) spanning at least swp0 and swp1
- 8021q VLAN upper interfaces on swp0 and swp1: swp0.700, swp1.700
- ptp4l on swp0.700 and swp1.700

we see that the ptp4l instances do not see each other's traffic,
and they all go to the grand master state due to the
ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES condition.

Jumping to the conclusion for the impatient
-------------------------------------------

There is a zero-day bug in the ocelot switchdev driver in the way it
handles VLAN-tagged packet injection. The correct logic already exists in
the source code, in function ocelot_xmit_get_vlan_info() added by commit
5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit").
But it is used only for normal NPI-based injection with the DSA "ocelot"
tagging protocol. The other injection code paths (register-based and
FDMA-based) roll their own wrong logic. This affects and was noticed on
the DSA "ocelot-8021q" protocol because it uses register-based injection.

By moving ocelot_xmit_get_vlan_info() to a place that's common for both
the DSA tagger and the ocelot switch library, it can also be called from
ocelot_port_inject_frame() in ocelot.c.

We need to touch the lines with ocelot_ifh_port_set()'s prototype
anyway, so let's rename it to something clearer regarding what it does,
and add a kernel-doc. ocelot_ifh_set_basic() should do.

Investigation notes
-------------------

Debugging reveals that PTP event (aka those carrying timestamps, like
Sync) frames injected into swp0.700 (but also swp1.700) hit the wire
with two VLAN tags:

00000000: 01 1b 19 00 00 00 00 01 02 03 04 05 81 00 02 bc
                                              ~~~~~~~~~~~
00000010: 81 00 02 bc 88 f7 00 12 00 2c 00 00 02 00 00 00
          ~~~~~~~~~~~
00000020: 00 00 00 00 00 00 00 00 00 00 00 01 02 ff fe 03
00000030: 04 05 00 01 00 04 00 00 00 00 00 00 00 00 00 00
00000040: 00 00

The second (unexpected) VLAN tag makes felix_check_xtr_pkt() ->
ptp_classify_raw() fail to see these as PTP packets at the link
partner's receiving end, and return PTP_CLASS_NONE (because the BPF
classifier is not written to expect 2 VLAN tags).

The reason why packets have 2 VLAN tags is because the transmission
code treats VLAN incorrectly.

Neither ocelot switchdev, nor felix DSA, declare the NETIF_F_HW_VLAN_CTAG_TX
feature. Therefore, at xmit time, all VLANs should be in the skb head,
and none should be in the hwaccel area. This is done by:

static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
					  netdev_features_t features)
{
	if (skb_vlan_tag_present(skb) &&
	    !vlan_hw_offload_capable(features, skb->vlan_proto))
		skb = __vlan_hwaccel_push_inside(skb);
	return skb;
}

But ocelot_port_inject_frame() handles things incorrectly:

	ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb));

void ocelot_ifh_port_set(struct sk_buff *skb, void *ifh, int port, u32 rew_op)
{
	(...)
	if (vlan_tag)
		ocelot_ifh_set_vlan_tci(ifh, vlan_tag);
	(...)
}

The way __vlan_hwaccel_push_inside() pushes the tag inside the skb head
is by calling:

static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
{
	skb->vlan_present = 0;
}

which does _not_ zero out skb->vlan_tci as seen by skb_vlan_tag_get().
This means that ocelot, when it calls skb_vlan_tag_get(), sees
(and uses) a residual skb->vlan_tci, while the same VLAN tag is
_already_ in the skb head.

The trivial fix for double VLAN headers is to replace the content of
ocelot_ifh_port_set() with:

	if (skb_vlan_tag_present(skb))
		ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb));

but this would not be correct either, because, as mentioned,
vlan_hw_offload_capable() is false for us, so we'd be inserting dead
code and we'd always transmit packets with VID=0 in the injection frame
header.

I can't actually test the ocelot switchdev driver and rely exclusively
on code inspection, but I don't think traffic from 8021q uppers has ever
been injected properly, and not double-tagged. Thus I'm blaming the
introduction of VLAN fields in the injection header - early driver code.

As hinted at in the early conclusion, what we _want_ to happen for
VLAN transmission was already described once in commit 5ca721c54d86
("net: dsa: tag_ocelot: set the classified VLAN during xmit").

ocelot_xmit_get_vlan_info() intends to ensure that if the port through
which we're transmitting is under a VLAN-aware bridge, the outer VLAN
tag from the skb head is stripped from there and inserted into the
injection frame header (so that the packet is processed in hardware
through that actual VLAN). And in all other cases, the packet is sent
with VID=0 in the injection frame header, since the port is VLAN-unaware
and has logic to strip this VID on egress (making it invisible to the
wire).

Fixes: 08d02364b12f ("net: mscc: fix the injection header")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/ocelot.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 include/soc/mscc/ocelot.h  |  3 ++-
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index dca2969015d8..6fbfbde68a37 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -5,6 +5,8 @@
 #ifndef _NET_DSA_TAG_OCELOT_H
 #define _NET_DSA_TAG_OCELOT_H
 
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
 #include <linux/kthread.h>
 #include <linux/packing.h>
 #include <linux/skbuff.h>
@@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
 	return rew_op;
 }
 
+/**
+ * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame
+ * @skb: Pointer to socket buffer
+ * @br: Pointer to bridge device that the port is under, if any
+ * @vlan_tci:
+ * @tag_type:
+ *
+ * If the port is under a VLAN-aware bridge, remove the VLAN header from the
+ * payload and move it into the DSA tag, which will make the switch classify
+ * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero,
+ * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not
+ * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()).
+ * Anyway, VID 0 is fine because it is stripped on egress for these port modes,
+ * and source address learning is not performed for packets injected from the
+ * CPU anyway, so it doesn't matter that the VID is "wrong".
+ */
+static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb,
+					     struct net_device *br,
+					     u64 *vlan_tci, u64 *tag_type)
+{
+	struct vlan_ethhdr *hdr;
+	u16 proto, tci;
+
+	if (!br || !br_vlan_enabled(br)) {
+		*vlan_tci = 0;
+		*tag_type = IFH_TAG_TYPE_C;
+		return;
+	}
+
+	hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+	br_vlan_get_proto(br, &proto);
+
+	if (ntohs(hdr->h_vlan_proto) == proto) {
+		vlan_remove_tag(skb, &tci);
+		*vlan_tci = tci;
+	} else {
+		rcu_read_lock();
+		br_vlan_get_pvid_rcu(br, &tci);
+		rcu_read_unlock();
+		*vlan_tci = tci;
+	}
+
+	*tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C;
+}
+
 #endif
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 6a37b29f4b4c..ed18e6bafc8d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -969,7 +969,8 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
 bool ocelot_can_inject(struct ocelot *ocelot, int grp);
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 			      u32 rew_op, struct sk_buff *skb);
-void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag);
+void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port,
+			  u32 rew_op, struct sk_buff *skb);
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb,
-- 
cgit v1.2.3


From c5e12ac3beb0dd3a718296b2d8af5528e9ab728e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 15 Aug 2024 03:07:04 +0300
Subject: net: mscc: ocelot: serialize access to the injection/extraction
 groups

As explained by Horatiu Vultur in commit 603ead96582d ("net: sparx5: Add
spinlock for frame transmission from CPU") which is for a similar
hardware design, multiple CPUs can simultaneously perform injection
or extraction. There are only 2 register groups for injection and 2
for extraction, and the driver only uses one of each. So we'd better
serialize access using spin locks, otherwise frame corruption is
possible.

Note that unlike in sparx5, FDMA in ocelot does not have this issue
because struct ocelot_fdma_tx_ring already contains an xmit_lock.

I guess this is mostly a problem for NXP LS1028A, as that is dual core.
I don't think VSC7514 is. So I'm blaming the commit where LS1028A (aka
the felix DSA driver) started using register-based packet injection and
extraction.

Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index ed18e6bafc8d..462c653e1017 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -813,6 +813,9 @@ struct ocelot {
 	const u32 *const		*map;
 	struct list_head		stats_regions;
 
+	spinlock_t			inj_lock;
+	spinlock_t			xtr_lock;
+
 	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
 	int				packet_buffer_size;
 	int				num_frame_refs;
@@ -966,6 +969,12 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
 			      u32 val, u32 reg, u32 offset);
 
 /* Packet I/O */
+void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp);
+void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp);
+void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp);
+void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp);
+void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp);
+void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp);
 bool ocelot_can_inject(struct ocelot *ocelot, int grp);
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 			      u32 rew_op, struct sk_buff *skb);
-- 
cgit v1.2.3


From 93e4649efa964201c73b0a03c35c04a0d6fc809f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 15 Aug 2024 03:07:05 +0300
Subject: net: dsa: provide a software untagging function on RX for VLAN-aware
 bridges

Through code analysis, I realized that the ds->untag_bridge_pvid logic
is contradictory - see the newly added FIXME above the kernel-doc for
dsa_software_untag_vlan_unaware_bridge().

Moreover, for the Felix driver, I need something very similar, but which
is actually _not_ contradictory: untag the bridge PVID on RX, but for
VLAN-aware bridges. The existing logic does it for VLAN-unaware bridges.

Since I don't want to change the functionality of drivers which were
supposedly properly tested with the ds->untag_bridge_pvid flag, I have
introduced a new one: ds->untag_vlan_aware_bridge_pvid, and I have
refactored the DSA reception code into a common path for both flags.

TODO: both flags should be unified under a single ds->software_vlan_untag,
which users of both current flags should set. This is not something that
can be carried out right away. It needs very careful examination of all
drivers which make use of this functionality, since some of them
actually get this wrong in the first place.

For example, commit 9130c2d30c17 ("net: dsa: microchip: ksz8795: Use
software untagging on CPU port") uses this in a driver which has
ds->configure_vlan_while_not_filtering = true. The latter mechanism has
been known for many years to be broken by design:
https://lore.kernel.org/netdev/CABumfLzJmXDN_W-8Z=p9KyKUVi_HhS7o_poBkeKHS2BkAiyYpw@mail.gmail.com/
and we have the situation of 2 bugs canceling each other. There is no
private VLAN, and the port follows the PVID of the VLAN-unaware bridge.
So, it's kinda ok for that driver to use the ds->untag_bridge_pvid
mechanism, in a broken way.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b06f97ae3da1..d7a6c2930277 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -403,14 +403,18 @@ struct dsa_switch {
 	 */
 	u32			configure_vlan_while_not_filtering:1;
 
-	/* If the switch driver always programs the CPU port as egress tagged
-	 * despite the VLAN configuration indicating otherwise, then setting
-	 * @untag_bridge_pvid will force the DSA receive path to pop the
-	 * bridge's default_pvid VLAN tagged frames to offer a consistent
-	 * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge
-	 * device.
+	/* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames.
+	 * DEPRECATED: Do NOT set this field in new drivers. Instead look at
+	 * the dsa_software_vlan_untag() comments.
 	 */
 	u32			untag_bridge_pvid:1;
+	/* Pop the default_pvid of VLAN-aware bridge ports from tagged frames.
+	 * Useful if the switch cannot preserve the VLAN tag as seen on the
+	 * wire for user port ingress, and chooses to send all frames as
+	 * VLAN-tagged to the CPU, including those which were originally
+	 * untagged.
+	 */
+	u32			untag_vlan_aware_bridge_pvid:1;
 
 	/* Let DSA manage the FDB entries towards the
 	 * CPU, based on the software bridge database.
-- 
cgit v1.2.3


From 36dd1141be70b5966906919714dc504a24c65ddf Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 15 Aug 2024 03:07:07 +0300
Subject: net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged

I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0]
and realized that not only is its basic VLAN classification pipeline
improper for offloading vlan_protocol 802.1ad bridges, but also improper
for offloading regular 802.1Q bridges already.

Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by
bridged ports, but this switch treats it as if it was 802.1Q-tagged with
the same VID as in the 802.1ad header. This is markedly different to
what the Linux bridge expects; see the "other_tpid()" function in
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh.

An idea came to me that the VCAP IS1 TCAM is more powerful than I'm
giving it credit for, and that it actually overwrites the classified VID
before the VLAN Table lookup takes place. In other words, it can be
used even to save a packet from being dropped on ingress due to VLAN
membership.

Add a sophisticated TCAM rule hardcoded into the driver to force the
switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol
802.1Q.

Regarding the lifetime of the filter: eventually the bridge will
disappear, and vlan_filtering on the port will be restored to 0 for
standalone mode. Then the filter will be deleted.

[0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/

Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot_vcap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index c601a4598b0d..eb19668a06db 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -13,6 +13,7 @@
  */
 #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port))
 #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port)		(port)
+#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port)		((ocelot)->num_phys_ports + (port))
 #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port)		(port)
 #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port)		((ocelot)->num_phys_ports + (port))
 #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot)			((ocelot)->num_phys_ports * 2)
@@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan {
 	struct ocelot_vcap_u8  pcp;    /* PCP (3 bit) */
 	enum ocelot_vcap_bit dei;    /* DEI */
 	enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */
+	enum ocelot_vcap_bit tpid;
 };
 
 struct ocelot_vcap_key_etype {
-- 
cgit v1.2.3


From 3942bb49728ad9e1f94d953a88af169a8f5d8099 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 14 Aug 2024 13:00:34 +0300
Subject: string: add mem_is_zero() helper to check if memory area is all zeros

Almost two thirds of the memchr_inv() usages check if the memory area is
all zeros, with no interest in where in the buffer the first non-zero
byte is located. Checking for !memchr_inv(s, 0, n) is also not very
intuitive or discoverable. Add an explicit mem_is_zero() helper for this
use case.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240814100035.3100852-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/string.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 9edace076ddb..5855c5626b4b 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -279,6 +279,18 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 void *memchr_inv(const void *s, int c, size_t n);
 char *strreplace(char *str, char old, char new);
 
+/**
+ * mem_is_zero - Check if an area of memory is all 0's.
+ * @s: The memory area
+ * @n: The size of the area
+ *
+ * Return: True if the area of memory is all 0's.
+ */
+static inline bool mem_is_zero(const void *s, size_t n)
+{
+	return !memchr_inv(s, 0, n);
+}
+
 extern void kfree_const(const void *x);
 
 extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
-- 
cgit v1.2.3


From 6e6f58a170ea98e44075b761f2da42a5aec47dfb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 13 Aug 2024 16:29:11 +0200
Subject: thermal: gov_bang_bang: Use governor_data to reduce overhead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After running once, the for_each_trip_desc() loop in
bang_bang_manage() is pure needless overhead because it is not going to
make any changes unless a new cooling device has been bound to one of
the trips in the thermal zone or the system is resuming from sleep.

For this reason, make bang_bang_manage() set governor_data for the
thermal zone and check it upfront to decide whether or not it needs to
do anything.

However, governor_data needs to be reset in some cases to let
bang_bang_manage() know that it should walk the trips again, so add an
.update_tz() callback to the governor and make the core additionally
invoke it during system resume.

To avoid affecting the other users of that callback unnecessarily, add
a special notification reason for system resume, THERMAL_TZ_RESUME, and
also pass it to __thermal_zone_device_update() called during system
resume for consistency.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Kästle <peter@piie.net>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Cc: 6.10+ <stable@vger.kernel.org> # 6.10+
Link: https://patch.msgid.link/2285575.iZASKD2KPV@rjwysocki.net
---
 include/linux/thermal.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 25fbf960b474..b86ddca46b9e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -55,6 +55,7 @@ enum thermal_notify_event {
 	THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */
 	THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
 	THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
+	THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */
 };
 
 /**
-- 
cgit v1.2.3


From bf5ffc8c80e0cf5205849cd0c9c3cb261d2beee6 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:18 -0600
Subject: perf: arm_pmu: Remove event index to counter remapping

Xscale and Armv6 PMUs defined the cycle counter at 0 and event counters
starting at 1 and had 1:1 event index to counter numbering. On Armv7 and
later, this changed the cycle counter to 31 and event counters start at
0. The drivers for Armv7 and PMUv3 kept the old event index numbering
and introduced an event index to counter conversion. The conversion uses
masking to convert from event index to a counter number. This operation
relies on having at most 32 counters so that the cycle counter index 0
can be transformed to counter number 31.

Armv9.4 adds support for an additional fixed function counter
(instructions) which increases possible counters to more than 32, and
the conversion won't work anymore as a simple subtract and mask. The
primary reason for the translation (other than history) seems to be to
have a contiguous mask of counters 0-N. Keeping that would result in
more complicated index to counter conversions. Instead, store a mask of
available counters rather than just number of events. That provides more
information in addition to the number of events.

No (intended) functional changes.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-1-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf/arm_pmu.h   | 2 +-
 include/linux/perf/arm_pmuv3.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index b3b34f6670cf..e5d6d204beab 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -96,7 +96,7 @@ struct arm_pmu {
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
-	int		num_events;
+	DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS		0x40
 	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 7867db04ec98..eccbdd8eb98f 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -6,6 +6,7 @@
 #ifndef __PERF_ARM_PMUV3_H
 #define __PERF_ARM_PMUV3_H
 
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS	31
 #define ARMV8_PMU_MAX_COUNTERS	32
 #define ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
 
-- 
cgit v1.2.3


From a4a6e2078d85a9d94bcc7eab77845cb8cd39f680 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:19 -0600
Subject: perf: arm_pmuv3: Prepare for more than 32 counters

Various PMUv3 registers which are a mask of counters are 64-bit
registers, but the accessor functions take a u32. This has been fine as
the upper 32-bits have been RES0 as there has been a maximum of 32
counters prior to Armv9.4/8.9. With Armv9.4/8.9, a 33rd counter is
added. Update the accessor functions to use a u64 instead.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-2-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/kvm/arm_pmu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 35d4ca4f6122..334d7c5503cf 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -19,8 +19,8 @@ struct kvm_pmc {
 };
 
 struct kvm_pmu_events {
-	u32 events_host;
-	u32 events_guest;
+	u64 events_host;
+	u64 events_guest;
 };
 
 struct kvm_pmu {
-- 
cgit v1.2.3


From f9b11aa00708d94a0cd78bfde34b68c0f95d8b50 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:21 -0600
Subject: KVM: arm64: pmu: Use generated define for PMSELR_EL0.SEL access

ARMV8_PMU_COUNTER_MASK is really a mask for the PMSELR_EL0.SEL register
field. Make that clear by adding a standard sysreg definition for the
register, and using it instead.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-4-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf/arm_pmuv3.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index eccbdd8eb98f..792b8e10b72a 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -8,7 +8,6 @@
 
 #define ARMV8_PMU_MAX_GENERAL_COUNTERS	31
 #define ARMV8_PMU_MAX_COUNTERS	32
-#define ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
 
 /*
  * Common architectural and microarchitectural event numbers.
-- 
cgit v1.2.3


From 126d7d7cce5e048fb82477a9842d088d10ff0df6 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:22 -0600
Subject: arm64: perf/kvm: Use a common PMU cycle counter define

The PMUv3 and KVM code each have a define for the PMU cycle counter
index. Move KVM's define to a shared location and use it for PMUv3
driver.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-5-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/kvm/arm_pmu.h          | 1 -
 include/linux/perf/arm_pmuv3.h | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 334d7c5503cf..871067fb2616 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -10,7 +10,6 @@
 #include <linux/perf_event.h>
 #include <linux/perf/arm_pmuv3.h>
 
-#define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 
 #if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
 struct kvm_pmc {
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 792b8e10b72a..f4ec76f725a3 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -9,6 +9,9 @@
 #define ARMV8_PMU_MAX_GENERAL_COUNTERS	31
 #define ARMV8_PMU_MAX_COUNTERS	32
 
+#define ARMV8_PMU_CYCLE_IDX		31
+
+
 /*
  * Common architectural and microarchitectural event numbers.
  */
-- 
cgit v1.2.3


From 2f62701fa5b0ee94c68d2fcfc470d08aef195441 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:23 -0600
Subject: KVM: arm64: Refine PMU defines for number of counters

There are 2 defines for the number of PMU counters:
ARMV8_PMU_MAX_COUNTERS and ARMPMU_MAX_HWEVENTS. Both are the same
currently, but Armv9.4/8.9 increases the number of possible counters
from 32 to 33. With this change, the maximum number of counters will
differ for KVM's PMU emulation which is PMUv3.4. Give KVM PMU emulation
its own define to decouple it from the rest of the kernel's number PMU
counters.

The VHE PMU code needs to match the PMU driver, so switch it to use
ARMPMU_MAX_HWEVENTS instead.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-6-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/kvm/arm_pmu.h          | 3 ++-
 include/linux/perf/arm_pmuv3.h | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 871067fb2616..e08aeec5d936 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -10,6 +10,7 @@
 #include <linux/perf_event.h>
 #include <linux/perf/arm_pmuv3.h>
 
+#define KVM_ARMV8_PMU_MAX_COUNTERS	32
 
 #if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
 struct kvm_pmc {
@@ -25,7 +26,7 @@ struct kvm_pmu_events {
 struct kvm_pmu {
 	struct irq_work overflow_work;
 	struct kvm_pmu_events events;
-	struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
+	struct kvm_pmc pmc[KVM_ARMV8_PMU_MAX_COUNTERS];
 	int irq_num;
 	bool created;
 	bool irq_level;
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index f4ec76f725a3..4f7a7f2222e5 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -7,8 +7,6 @@
 #define __PERF_ARM_PMUV3_H
 
 #define ARMV8_PMU_MAX_GENERAL_COUNTERS	31
-#define ARMV8_PMU_MAX_COUNTERS	32
-
 #define ARMV8_PMU_CYCLE_IDX		31
 
 
-- 
cgit v1.2.3


From d8226d8cfbaf5eb9771af8ad8b4e58697e2ffb74 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 31 Jul 2024 10:51:24 -0600
Subject: perf: arm_pmuv3: Add support for Armv9.4 PMU instruction counter

Armv9.4/8.9 PMU adds optional support for a fixed instruction counter
similar to the fixed cycle counter. Support for the feature is indicated
in the ID_AA64DFR1_EL1 register PMICNTR field. The counter is not
accessible in AArch32.

Existing userspace using direct counter access won't know how to handle
the fixed instruction counter, so we have to avoid using the counter
when user access is requested.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-7-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf/arm_pmu.h   | 8 ++++++--
 include/linux/perf/arm_pmuv3.h | 6 ++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index e5d6d204beab..4b5b83677e3f 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -17,10 +17,14 @@
 #ifdef CONFIG_ARM_PMU
 
 /*
- * The ARMv7 CPU PMU supports up to 32 event counters.
+ * The Armv7 and Armv8.8 or less CPU PMU supports up to 32 event counters.
+ * The Armv8.9/9.4 CPU PMU supports up to 33 event counters.
  */
+#ifdef CONFIG_ARM
 #define ARMPMU_MAX_HWEVENTS		32
-
+#else
+#define ARMPMU_MAX_HWEVENTS		33
+#endif
 /*
  * ARM PMU hw_event flags
  */
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 4f7a7f2222e5..3372c1b56486 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -8,7 +8,7 @@
 
 #define ARMV8_PMU_MAX_GENERAL_COUNTERS	31
 #define ARMV8_PMU_CYCLE_IDX		31
-
+#define ARMV8_PMU_INSTR_IDX		32 /* Not accessible from AArch32 */
 
 /*
  * Common architectural and microarchitectural event numbers.
@@ -228,8 +228,10 @@
  */
 #define ARMV8_PMU_OVSR_P		GENMASK(30, 0)
 #define ARMV8_PMU_OVSR_C		BIT(31)
+#define ARMV8_PMU_OVSR_F		BIT_ULL(32) /* arm64 only */
 /* Mask for writable bits is both P and C fields */
-#define ARMV8_PMU_OVERFLOWED_MASK	(ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
+#define ARMV8_PMU_OVERFLOWED_MASK	(ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C | \
+					ARMV8_PMU_OVSR_F)
 
 /*
  * PMXEVTYPER: Event selection reg
-- 
cgit v1.2.3


From 2140e63cd87fa707acf514d594725097bed018fd Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 12 Aug 2024 09:30:44 +0200
Subject: ethtool: Add new result codes for TDR diagnostics

Add new result codes to support TDR diagnostics in preparation for
Open Alliance 1000BaseT1 TDR support:

- ETHTOOL_A_CABLE_RESULT_CODE_NOISE: TDR not possible due to high noise
  level.
- ETHTOOL_A_CABLE_RESULT_CODE_RESOLUTION_NOT_POSSIBLE: TDR resolution not
  possible / out of distance.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20240812073046.1728288-1-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 93c57525a975..9074fa309bd6 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -556,6 +556,10 @@ enum {
 	 * a regular 100 Ohm cable and a part with the abnormal impedance value
 	 */
 	ETHTOOL_A_CABLE_RESULT_CODE_IMPEDANCE_MISMATCH,
+	/* TDR not possible due to high noise level */
+	ETHTOOL_A_CABLE_RESULT_CODE_NOISE,
+	/* TDR resolution not possible / out of distance */
+	ETHTOOL_A_CABLE_RESULT_CODE_RESOLUTION_NOT_POSSIBLE,
 };
 
 enum {
-- 
cgit v1.2.3


From f40a455d01f80c6638be382d75cb1c4e7748d8af Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Tue, 13 Aug 2024 14:33:47 +0100
Subject: ipv6: Add ipv6_addr_{cpu_to_be32,be32_to_cpu} helpers

Add helpers to convert an ipv6 addr, expressed as an array
of words, from CPU to big-endian byte order, and vice versa.

No functional change intended.
Compile tested only.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/netdev/c7684349-535c-45a4-9a74-d47479a50020@lunn.ch/
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240813-ipv6_addr-helpers-v2-1-5c974f8cca3e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 88a8e554f7a1..e7113855a10f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1365,4 +1365,16 @@ static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
 	release_sock(sk);
 }
 
+#define IPV6_ADDR_WORDS 4
+
+static inline void ipv6_addr_cpu_to_be32(__be32 *dst, const u32 *src)
+{
+	cpu_to_be32_array(dst, src, IPV6_ADDR_WORDS);
+}
+
+static inline void ipv6_addr_be32_to_cpu(u32 *dst, const __be32 *src)
+{
+	be32_to_cpu_array(dst, src, IPV6_ADDR_WORDS);
+}
+
 #endif /* _NET_IPV6_H */
-- 
cgit v1.2.3


From 1fc2ac428ef7d2ab9e8e19efe7ec3e58aea51bf3 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 16 Aug 2024 12:15:23 -0600
Subject: io_uring: fix user_data field name in comment

io_uring_cqe's user_data field refers to `sqe->data`, but io_uring_sqe
does not have a data field. Fix the comment to say `sqe->user_data`.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Link: https://github.com/axboe/liburing/pull/1206
Link: https://lore.kernel.org/r/20240816181526.3642732-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2aaf7ee256ac..adc2524fd8e3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags {
  * IO completion data structure (Completion Queue Entry)
  */
 struct io_uring_cqe {
-	__u64	user_data;	/* sqe->data submission passed back */
+	__u64	user_data;	/* sqe->user_data value passed back */
 	__s32	res;		/* result code for this event */
 	__u32	flags;
 
-- 
cgit v1.2.3


From f03e94f23b04c2b71c0044c1534921b3975ef10c Mon Sep 17 00:00:00 2001
From: Chaotian Jing <chaotian.jing@mediatek.com>
Date: Tue, 13 Aug 2024 13:34:10 +0800
Subject: scsi: core: Fix the return value of scsi_logical_block_count()

scsi_logical_block_count() should return the block count of a given SCSI
command. The original implementation ended up shifting twice, leading to an
incorrect count being returned. Fix the conversion between bytes and
logical blocks.

Cc: stable@vger.kernel.org
Fixes: 6a20e21ae1e2 ("scsi: core: Add helper to return number of logical blocks in a request")
Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
Link: https://lore.kernel.org/r/20240813053534.7720-1-chaotian.jing@mediatek.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_cmnd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 45c40d200154..8ecfb94049db 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -234,7 +234,7 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 
 static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd)
 {
-	unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT;
+	unsigned int shift = ilog2(scmd->device->sector_size);
 
 	return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift;
 }
-- 
cgit v1.2.3


From cd06b713a6880997ca5aecac8e33d5f9c541749e Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 16 Aug 2024 11:55:10 +0530
Subject: scsi: ufs: core: Add a quirk for handling broken LSDBS field in
 controller capabilities register

'Legacy Queue & Single Doorbell Support (LSDBS)' field in the controller
capabilities register is supposed to report whether the legacy single
doorbell mode is supported in the controller or not. But some controllers
report '1' in this field which corresponds to 'LSDB not supported', but
they indeed support LSDB. So let's add a quirk to handle those controllers.

If the quirk is enabled by the controller driver, then LSDBS register field
will be ignored and legacy single doorbell mode is assumed to be enabled
always.

Tested-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-1-e6fe0e18e2a3@linaro.org
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index cac0cdb9a916..0fd2aebac728 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -676,6 +676,14 @@ enum ufshcd_quirks {
 	 * the standard best practice for managing keys).
 	 */
 	UFSHCD_QUIRK_KEYS_IN_PRDT			= 1 << 24,
+
+	/*
+	 * This quirk indicates that the controller reports the value 1 (not
+	 * supported) in the Legacy Single DoorBell Support (LSDBS) bit of the
+	 * Controller Capabilities register although it supports the legacy
+	 * single doorbell mode.
+	 */
+	UFSHCD_QUIRK_BROKEN_LSDBS_CAP			= 1 << 25,
 };
 
 enum ufshcd_caps {
-- 
cgit v1.2.3


From fca5cb4dd2b4a9423cb6d112cc71c33899955a1f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 10 Aug 2024 14:20:55 +0800
Subject: Revert "lib/mpi: Extend the MPI library"

This partially reverts commit a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e.

Most of it is no longer needed since sm2 has been removed.  However,
the following functions have been kept as they have developed other
uses:

mpi_copy

mpi_mod

mpi_test_bit
mpi_set_bit
mpi_rshift

mpi_add
mpi_sub
mpi_addm
mpi_subm

mpi_mul
mpi_mulm

mpi_tdiv_r
mpi_fdiv_r

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h | 65 -----------------------------------------------------
 1 file changed, 65 deletions(-)

(limited to 'include')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 89b720893e12..e081428b91ef 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -40,87 +40,33 @@ struct gcry_mpi {
 typedef struct gcry_mpi *MPI;
 
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
-#define mpi_has_sign(a)       ((a)->sign)
 
 /*-- mpiutil.c --*/
 MPI mpi_alloc(unsigned nlimbs);
-void mpi_clear(MPI a);
 void mpi_free(MPI a);
 int mpi_resize(MPI a, unsigned nlimbs);
 
-static inline MPI mpi_new(unsigned int nbits)
-{
-	return mpi_alloc((nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
-}
-
 MPI mpi_copy(MPI a);
-MPI mpi_alloc_like(MPI a);
-void mpi_snatch(MPI w, MPI u);
-MPI mpi_set(MPI w, MPI u);
-MPI mpi_set_ui(MPI w, unsigned long u);
-MPI mpi_alloc_set_ui(unsigned long u);
-void mpi_swap_cond(MPI a, MPI b, unsigned long swap);
-
-/* Constants used to return constant MPIs.  See mpi_init if you
- * want to add more constants.
- */
-#define MPI_NUMBER_OF_CONSTANTS 6
-enum gcry_mpi_constants {
-	MPI_C_ZERO,
-	MPI_C_ONE,
-	MPI_C_TWO,
-	MPI_C_THREE,
-	MPI_C_FOUR,
-	MPI_C_EIGHT
-};
-
-MPI mpi_const(enum gcry_mpi_constants no);
 
 /*-- mpicoder.c --*/
-
-/* Different formats of external big integer representation. */
-enum gcry_mpi_format {
-	GCRYMPI_FMT_NONE = 0,
-	GCRYMPI_FMT_STD = 1,    /* Twos complement stored without length. */
-	GCRYMPI_FMT_PGP = 2,    /* As used by OpenPGP (unsigned only). */
-	GCRYMPI_FMT_SSH = 3,    /* As used by SSH (like STD but with length). */
-	GCRYMPI_FMT_HEX = 4,    /* Hex format. */
-	GCRYMPI_FMT_USG = 5,    /* Like STD but unsigned. */
-	GCRYMPI_FMT_OPAQUE = 8  /* Opaque format (some functions only). */
-};
-
 MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
 MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
-int mpi_fromstr(MPI val, const char *str);
-MPI mpi_scanval(const char *string);
 MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len);
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 		    int *sign);
 int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes,
 		     int *sign);
-int mpi_print(enum gcry_mpi_format format, unsigned char *buffer,
-			size_t buflen, size_t *nwritten, MPI a);
 
 /*-- mpi-mod.c --*/
 void mpi_mod(MPI rem, MPI dividend, MPI divisor);
 
-/* Context used with Barrett reduction.  */
-struct barrett_ctx_s;
-typedef struct barrett_ctx_s *mpi_barrett_t;
-
-mpi_barrett_t mpi_barrett_init(MPI m, int copy);
-void mpi_barrett_free(mpi_barrett_t ctx);
-void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx);
-void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx);
-
 /*-- mpi-pow.c --*/
 int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
 
 /*-- mpi-cmp.c --*/
 int mpi_cmp_ui(MPI u, ulong v);
 int mpi_cmp(MPI u, MPI v);
-int mpi_cmpabs(MPI u, MPI v);
 
 /*-- mpi-sub-ui.c --*/
 int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
@@ -130,16 +76,9 @@ void mpi_normalize(MPI a);
 unsigned mpi_get_nbits(MPI a);
 int mpi_test_bit(MPI a, unsigned int n);
 void mpi_set_bit(MPI a, unsigned int n);
-void mpi_set_highbit(MPI a, unsigned int n);
-void mpi_clear_highbit(MPI a, unsigned int n);
-void mpi_clear_bit(MPI a, unsigned int n);
-void mpi_rshift_limbs(MPI a, unsigned int count);
 void mpi_rshift(MPI x, MPI a, unsigned int n);
-void mpi_lshift_limbs(MPI a, unsigned int count);
-void mpi_lshift(MPI x, MPI a, unsigned int n);
 
 /*-- mpi-add.c --*/
-void mpi_add_ui(MPI w, MPI u, unsigned long v);
 void mpi_add(MPI w, MPI u, MPI v);
 void mpi_sub(MPI w, MPI u, MPI v);
 void mpi_addm(MPI w, MPI u, MPI v, MPI m);
@@ -152,10 +91,6 @@ void mpi_mulm(MPI w, MPI u, MPI v, MPI m);
 /*-- mpi-div.c --*/
 void mpi_tdiv_r(MPI rem, MPI num, MPI den);
 void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
-void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
-
-/*-- mpi-inv.c --*/
-int mpi_invm(MPI x, MPI a, MPI n);
 
 /* inline functions */
 
-- 
cgit v1.2.3


From 8e3a67f2de87ee94ac11ea69beb4edc2993b17a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 10 Aug 2024 14:20:57 +0800
Subject: crypto: lib/mpi - Add error checks to extension

The remaining functions added by commit
a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e did not check for memory
allocation errors.  Add the checks and change the API to allow errors
to be returned.

Fixes: a8ea8bdd9df9 ("lib/mpi: Extend the MPI library")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index e081428b91ef..47be46f36435 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -59,7 +59,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes,
 		     int *sign);
 
 /*-- mpi-mod.c --*/
-void mpi_mod(MPI rem, MPI dividend, MPI divisor);
+int mpi_mod(MPI rem, MPI dividend, MPI divisor);
 
 /*-- mpi-pow.c --*/
 int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
@@ -75,22 +75,22 @@ int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
 void mpi_normalize(MPI a);
 unsigned mpi_get_nbits(MPI a);
 int mpi_test_bit(MPI a, unsigned int n);
-void mpi_set_bit(MPI a, unsigned int n);
-void mpi_rshift(MPI x, MPI a, unsigned int n);
+int mpi_set_bit(MPI a, unsigned int n);
+int mpi_rshift(MPI x, MPI a, unsigned int n);
 
 /*-- mpi-add.c --*/
-void mpi_add(MPI w, MPI u, MPI v);
-void mpi_sub(MPI w, MPI u, MPI v);
-void mpi_addm(MPI w, MPI u, MPI v, MPI m);
-void mpi_subm(MPI w, MPI u, MPI v, MPI m);
+int mpi_add(MPI w, MPI u, MPI v);
+int mpi_sub(MPI w, MPI u, MPI v);
+int mpi_addm(MPI w, MPI u, MPI v, MPI m);
+int mpi_subm(MPI w, MPI u, MPI v, MPI m);
 
 /*-- mpi-mul.c --*/
-void mpi_mul(MPI w, MPI u, MPI v);
-void mpi_mulm(MPI w, MPI u, MPI v, MPI m);
+int mpi_mul(MPI w, MPI u, MPI v);
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m);
 
 /*-- mpi-div.c --*/
-void mpi_tdiv_r(MPI rem, MPI num, MPI den);
-void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
+int mpi_tdiv_r(MPI rem, MPI num, MPI den);
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
 
 /* inline functions */
 
-- 
cgit v1.2.3


From abc158c82ae555078aa5dd2d8407c3df0f868904 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 23 May 2024 10:55:59 +0200
Subject: sched: Prepare generic code for delayed dequeue

While most of the delayed dequeue code can be done inside the
sched_class itself, there is one location where we do not have an
appropriate hook, namely ttwu_runnable().

Add an ENQUEUE_DELAYED call to the on_rq path to deal with waking
delayed dequeue tasks.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.200000445@infradead.org
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c1b4ee3234f..f4a648e739e1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -544,6 +544,7 @@ struct sched_entity {
 
 	struct list_head		group_node;
 	unsigned int			on_rq;
+	unsigned int			sched_delayed;
 
 	u64				exec_start;
 	u64				sum_exec_runtime;
-- 
cgit v1.2.3


From a1c446611e31ca5363d4db51e398271da1dce0af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 1 Jul 2024 21:30:09 +0200
Subject: sched,freezer: Mark TASK_FROZEN special

The special task states are those that do not suffer spurious wakeups,
TASK_FROZEN is very much one of those, mark it as such.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.998329901@infradead.org
---
 include/linux/sched.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f4a648e739e1..8a3a389bd623 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -149,8 +149,9 @@ struct user_event_mm;
  * Special states are those that do not use the normal wait-loop pattern. See
  * the comment with set_special_state().
  */
-#define is_special_task_state(state)				\
-	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
+#define is_special_task_state(state)					\
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED |	\
+		    TASK_DEAD | TASK_FROZEN))
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 # define debug_normal_state_change(state_value)				\
-- 
cgit v1.2.3


From 82e9d0456e06cebe2c89f3c73cdbc9e3805e9437 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 31 May 2024 15:49:40 +0200
Subject: sched/fair: Avoid re-setting virtual deadline on 'migrations'

During OSPM24 Youssef noted that migrations are re-setting the virtual
deadline. Notably everything that does a dequeue-enqueue, like setting
nice, changing preferred numa-node, and a myriad of other random crap,
will cause this to happen.

This shouldn't be. Preserve the relative virtual deadline across such
dequeue/enqueue cycles.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105030.625119246@infradead.org
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a3a389bd623..d25e1cfd5766 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -544,8 +544,10 @@ struct sched_entity {
 	u64				min_vruntime;
 
 	struct list_head		group_node;
-	unsigned int			on_rq;
-	unsigned int			sched_delayed;
+	unsigned char			on_rq;
+	unsigned char			sched_delayed;
+	unsigned char			rel_deadline;
+					/* hole */
 
 	u64				exec_start;
 	u64				sum_exec_runtime;
-- 
cgit v1.2.3


From 857b158dc5e81c6de795ef6be006eed146098fc6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 22 May 2023 13:46:30 +0200
Subject: sched/eevdf: Use sched_attr::sched_runtime to set request/slice
 suggestion

Allow applications to directly set a suggested request/slice length using
sched_attr::sched_runtime.

The implementation clamps the value to: 0.1[ms] <= slice <= 100[ms]
which is 1/10 the size of HZ=1000 and 10 times the size of HZ=100.

Applications should strive to use their periodic runtime at a high
confidence interval (95%+) as the target slice. Using a smaller slice
will introduce undue preemptions, while using a larger value will
increase latency.

For all the following examples assume a scheduling quantum of 8, and for
consistency all examples have W=4:

  {A,B,C,D}(w=1,r=8):

  ABCD...
  +---+---+---+---

  t=0, V=1.5				t=1, V=3.5
  A  |------<				A          |------<
  B   |------<				B   |------<
  C    |------<				C    |------<
  D     |------<			D     |------<
  ---+*------+-------+---		---+--*----+-------+---

  t=2, V=5.5				t=3, V=7.5
  A          |------<			A          |------<
  B           |------<			B           |------<
  C    |------<				C            |------<
  D     |------<			D     |------<
  ---+----*--+-------+---		---+------*+-------+---

Note: 4 identical tasks in FIFO order

~~~

  {A,B}(w=1,r=16) C(w=2,r=16)

  AACCBBCC...
  +---+---+---+---

  t=0, V=1.25				t=2, V=5.25
  A  |--------------<                   A                  |--------------<
  B   |--------------<                  B   |--------------<
  C    |------<                         C    |------<
  ---+*------+-------+---               ---+----*--+-------+---

  t=4, V=8.25				t=6, V=12.25
  A                  |--------------<   A                  |--------------<
  B   |--------------<                  B                   |--------------<
  C            |------<                 C            |------<
  ---+-------*-------+---               ---+-------+---*---+---

Note: 1 heavy task -- because q=8, double r such that the deadline of the w=2
      task doesn't go below q.

Note: observe the full schedule becomes: W*max(r_i/w_i) = 4*2q = 8q in length.

Note: the period of the heavy task is half the full period at:
      W*(r_i/w_i) = 4*(2q/2) = 4q

~~~

  {A,C,D}(w=1,r=16) B(w=1,r=8):

  BAACCBDD...
  +---+---+---+---

  t=0, V=1.5				t=1, V=3.5
  A  |--------------<			A  |---------------<
  B   |------<				B           |------<
  C    |--------------<			C    |--------------<
  D     |--------------<		D     |--------------<
  ---+*------+-------+---		---+--*----+-------+---

  t=3, V=7.5				t=5, V=11.5
  A                  |---------------<  A                  |---------------<
  B           |------<                  B           |------<
  C    |--------------<                 C                    |--------------<
  D     |--------------<                D     |--------------<
  ---+------*+-------+---               ---+-------+--*----+---

  t=6, V=13.5
  A                  |---------------<
  B                   |------<
  C                    |--------------<
  D     |--------------<
  ---+-------+----*--+---

Note: 1 short task -- again double r so that the deadline of the short task
      won't be below q. Made B short because its not the leftmost task, but is
      eligible with the 0,1,2,3 spread.

Note: like with the heavy task, the period of the short task observes:
      W*(r_i/w_i) = 4*(1q/1) = 4q

~~~

  A(w=1,r=16) B(w=1,r=8) C(w=2,r=16)

  BCCAABCC...
  +---+---+---+---

  t=0, V=1.25				t=1, V=3.25
  A  |--------------<                   A  |--------------<
  B   |------<                          B           |------<
  C    |------<                         C    |------<
  ---+*------+-------+---               ---+--*----+-------+---

  t=3, V=7.25				t=5, V=11.25
  A  |--------------<                   A                  |--------------<
  B           |------<                  B           |------<
  C            |------<                 C            |------<
  ---+------*+-------+---               ---+-------+--*----+---

  t=6, V=13.25
  A                  |--------------<
  B                   |------<
  C            |------<
  ---+-------+----*--+---

Note: 1 heavy and 1 short task -- combine them all.

Note: both the short and heavy task end up with a period of 4q

~~~

  A(w=1,r=16) B(w=2,r=16) C(w=1,r=8)

  BBCAABBC...
  +---+---+---+---

  t=0, V=1				t=2, V=5
  A  |--------------<                   A  |--------------<
  B   |------<                          B           |------<
  C    |------<                         C    |------<
  ---+*------+-------+---               ---+----*--+-------+---

  t=3, V=7				t=5, V=11
  A  |--------------<                   A                  |--------------<
  B           |------<                  B           |------<
  C            |------<                 C            |------<
  ---+------*+-------+---               ---+-------+--*----+---

  t=7, V=15
  A                  |--------------<
  B                   |------<
  C            |------<
  ---+-------+------*+---

Note: as before but permuted

~~~

From all this it can be deduced that, for the steady state:

 - the total period (P) of a schedule is:	W*max(r_i/w_i)
 - the average period of a task is:		W*(r_i/w_i)
 - each task obtains the fair share:		w_i/W of each full period P

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105030.842834421@infradead.org
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d25e1cfd5766..89a3d8d94e96 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -547,6 +547,7 @@ struct sched_entity {
 	unsigned char			on_rq;
 	unsigned char			sched_delayed;
 	unsigned char			rel_deadline;
+	unsigned char			custom_slice;
 					/* hole */
 
 	u64				exec_start;
-- 
cgit v1.2.3


From aef6987d89544d63a47753cf3741cabff0b5574c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Jun 2024 13:16:49 +0200
Subject: sched/eevdf: Propagate min_slice up the cgroup hierarchy

In the absence of an explicit cgroup slice configureation, make mixed
slice length work with cgroups by propagating the min_slice up the
hierarchy.

This ensures the cgroup entity gets timely service to service its
entities that have this timing constraint set on them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105030.948188417@infradead.org
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89a3d8d94e96..3709dedbab59 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -542,6 +542,7 @@ struct sched_entity {
 	struct rb_node			run_node;
 	u64				deadline;
 	u64				min_vruntime;
+	u64				min_slice;
 
 	struct list_head		group_node;
 	unsigned char			on_rq;
-- 
cgit v1.2.3


From b0b228bb8d546edd23e2467ed7669d4ce62ae763 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Sat, 17 Aug 2024 17:33:34 +0800
Subject: ALSA: seq: Remove unused declarations

These functions are never implemented and used.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Link: https://patch.msgid.link/20240817093334.1120002-1-yuehaibing@huawei.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/seq_kernel.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h
index c8621671fa70..00c32eed2124 100644
--- a/include/sound/seq_kernel.h
+++ b/include/sound/seq_kernel.h
@@ -86,10 +86,6 @@ static inline size_t snd_seq_event_packet_size(struct snd_seq_event *ev)
 /* interface for OSS emulation */
 int snd_seq_set_queue_tempo(int client, struct snd_seq_queue_tempo *tempo);
 
-/* port callback routines */
-void snd_port_init_callback(struct snd_seq_port_callback *p);
-struct snd_seq_port_callback *snd_port_alloc_callback(void);
-
 /* port attach/detach */
 int snd_seq_event_port_attach(int client, struct snd_seq_port_callback *pcbp,
 			      int cap, int type, int midi_channels, int midi_voices, char *portname);
-- 
cgit v1.2.3


From 37745918e0e7575bc40f38da93a99b9fa6406224 Mon Sep 17 00:00:00 2001
From: Ivan Orlov <ivan.orlov0322@gmail.com>
Date: Tue, 13 Aug 2024 13:07:00 +0100
Subject: ALSA: timer: Introduce virtual userspace-driven timers

Implement two ioctl calls in order to support virtual userspace-driven
ALSA timers.

The first ioctl is SNDRV_TIMER_IOCTL_CREATE, which gets the
snd_timer_uinfo struct as a parameter and puts a file descriptor of a
virtual timer into the `fd` field of the snd_timer_unfo structure. It
also updates the `id` field of the snd_timer_uinfo struct, which
provides a unique identifier for the timer (basically, the subdevice
number which can be used when creating timer instances).

This patch also introduces a tiny id allocator for the userspace-driven
timers, which guarantees that we don't have more than 128 of them in the
system.

Another ioctl is SNDRV_TIMER_IOCTL_TRIGGER, which allows us to trigger
the virtual timer (and calls snd_timer_interrupt for the timer under
the hood), causing all of the timer instances binded to this timer to
execute their callbacks.

The maximum amount of ticks available for the timer is 1 for the sake of
simplicity of the userspace API. 'start', 'stop', 'open' and 'close'
callbacks for the userspace-driven timers are empty since we don't
really do any hardware initialization here.

Suggested-by: Axel Holzinger <aholzinger@gmx.de>
Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240813120701.171743-4-ivan.orlov0322@gmail.com
---
 include/uapi/sound/asound.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 8bf7e8a0eb6f..4cd513215bcd 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -869,7 +869,7 @@ struct snd_ump_block_info {
  *  Timer section - /dev/snd/timer
  */
 
-#define SNDRV_TIMER_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 7)
+#define SNDRV_TIMER_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 8)
 
 enum {
 	SNDRV_TIMER_CLASS_NONE = -1,
@@ -894,6 +894,7 @@ enum {
 #define SNDRV_TIMER_GLOBAL_RTC		1	/* unused */
 #define SNDRV_TIMER_GLOBAL_HPET		2
 #define SNDRV_TIMER_GLOBAL_HRTIMER	3
+#define SNDRV_TIMER_GLOBAL_UDRIVEN	4
 
 /* info flags */
 #define SNDRV_TIMER_FLG_SLAVE		(1<<0)	/* cannot be controlled */
@@ -974,6 +975,18 @@ struct snd_timer_status {
 };
 #endif
 
+/*
+ * This structure describes the userspace-driven timer. Such timers are purely virtual,
+ * and can only be triggered from software (for instance, by userspace application).
+ */
+struct snd_timer_uinfo {
+	/* To pretend being a normal timer, we need to know the resolution in ns. */
+	__u64 resolution;
+	int fd;
+	unsigned int id;
+	unsigned char reserved[16];
+};
+
 #define SNDRV_TIMER_IOCTL_PVERSION	_IOR('T', 0x00, int)
 #define SNDRV_TIMER_IOCTL_NEXT_DEVICE	_IOWR('T', 0x01, struct snd_timer_id)
 #define SNDRV_TIMER_IOCTL_TREAD_OLD	_IOW('T', 0x02, int)
@@ -990,6 +1003,8 @@ struct snd_timer_status {
 #define SNDRV_TIMER_IOCTL_CONTINUE	_IO('T', 0xa2)
 #define SNDRV_TIMER_IOCTL_PAUSE		_IO('T', 0xa3)
 #define SNDRV_TIMER_IOCTL_TREAD64	_IOW('T', 0xa4, int)
+#define SNDRV_TIMER_IOCTL_CREATE	_IOWR('T', 0xa5, struct snd_timer_uinfo)
+#define SNDRV_TIMER_IOCTL_TRIGGER	_IO('T', 0xa6)
 
 #if __BITS_PER_LONG == 64
 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD
-- 
cgit v1.2.3


From fda1dd3c54ef3c4200c2e77634a91610da973950 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Thu, 18 Jul 2024 17:50:37 -0700
Subject: find: Switch from inline to __always_inline

'inline' keyword is only a recommendation for compiler. If it decides to
not inline find_bit nodemask functions, the whole small_const_nbits()
machinery doesn't work.

This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch
replaces 'inline' directive with unconditional '__always_inline' to make
sure that there's always a chance for compile-time optimization. It doesn't
change size of kernel image, according to bloat-o-meter.

[[ Brian: split out from:
      Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline
      https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/
   But rewritten, as there were too many conflicts. ]]

Co-developed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/find.h | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/find.h b/include/linux/find.h
index 5dfca4225fef..68685714bc18 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
  * Returns the bit number for the next set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
@@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
  * Returns the bit number for the next set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_and_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
@@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
  * Returns the bit number for the next set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_andnot_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
@@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
  * Returns the bit number for the next set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_or_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
@@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1,
  * Returns the bit number of the next zero bit
  * If no bits are zero, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
@@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
  * Returns the bit number of the first set bit.
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 {
 	if (small_const_nbits(size)) {
@@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
  * Returns the bit number of the N'th set bit.
  * If no such, returns >= @size.
  */
-static inline
+static __always_inline
 unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
 {
 	if (n >= size)
@@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign
  * Returns the bit number of the N'th set bit.
  * If no such, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
 				unsigned long size, unsigned long n)
 {
@@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *
  * Returns the bit number of the N'th set bit.
  * If no such, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
 				unsigned long size, unsigned long n)
 {
@@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
  * Returns the bit number for the next set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_first_and_bit(const unsigned long *addr1,
 				 const unsigned long *addr2,
 				 unsigned long size)
@@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
  * Returns the bit number for the first set bit
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_first_and_and_bit(const unsigned long *addr1,
 				     const unsigned long *addr2,
 				     const unsigned long *addr3,
@@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1,
  * Returns the bit number of the first cleared bit.
  * If no bits are zero, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
 	if (small_const_nbits(size)) {
@@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
  *
  * Returns the bit number of the last set bit, or size.
  */
-static inline
+static __always_inline
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
 	if (small_const_nbits(size)) {
@@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
  * Returns the bit number for the next set bit, or first set bit up to @offset
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
 					const unsigned long *addr2,
 					unsigned long size, unsigned long offset)
@@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
  * Returns the bit number for the next set bit, or first set bit up to @offset
  * If no bits are set, returns @size.
  */
-static inline
+static __always_inline
 unsigned long find_next_bit_wrap(const unsigned long *addr,
 					unsigned long size, unsigned long offset)
 {
@@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr,
  * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
  * before using it alone.
  */
-static inline
+static __always_inline
 unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
 				 unsigned long start, unsigned long n)
 {
@@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump,
 
 #if defined(__LITTLE_ENDIAN)
 
-static inline unsigned long find_next_zero_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset)
 {
 	return find_next_zero_bit(addr, size, offset);
 }
 
-static inline unsigned long find_next_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset)
 {
 	return find_next_bit(addr, size, offset);
 }
 
-static inline unsigned long find_first_zero_bit_le(const void *addr,
-		unsigned long size)
+static __always_inline
+unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
 {
 	return find_first_zero_bit(addr, size);
 }
@@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr,
 #elif defined(__BIG_ENDIAN)
 
 #ifndef find_next_zero_bit_le
-static inline
+static __always_inline
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
@@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
 #endif
 
 #ifndef find_first_zero_bit_le
-static inline
+static __always_inline
 unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
 {
 	if (small_const_nbits(size)) {
@@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
 #endif
 
 #ifndef find_next_bit_le
-static inline
+static __always_inline
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-- 
cgit v1.2.3


From ed8cd2b3bd9f070120528fc5403a2d0b5afe07f8 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Thu, 18 Jul 2024 17:50:38 -0700
Subject: bitmap: Switch from inline to __always_inline

'inline' keyword is only a recommendation for compiler. If it decides to
not inline bitmap functions, the whole small_const_nbits() machinery
doesn't work.

This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch
replaces 'inline' directive with unconditional '__always_inline' to make
sure that there's always a chance for compile-time optimization. It doesn't
change size of kernel image, according to bloat-o-meter.

[[ Brian: split out from:
      Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline
      https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/
   But rewritten, as there were too many conflicts. ]]

Co-developed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitmap.h | 140 +++++++++++++++++++++++++++----------------------
 1 file changed, 76 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 8c4768c44a01..0406f48f0a6a 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
  * the bit offset of all zero areas this function finds is multiples of that
  * power of 2. A @align_mask of 0 means no alignment is required.
  */
-static inline unsigned long
-bitmap_find_next_zero_area(unsigned long *map,
-			   unsigned long size,
-			   unsigned long start,
-			   unsigned int nr,
-			   unsigned long align_mask)
+static __always_inline
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask)
 {
 	return bitmap_find_next_zero_area_off(map, size, start, nr,
 					      align_mask, 0);
@@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 
 #define bitmap_size(nbits)	(ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
 
-static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 {
 	unsigned int len = bitmap_size(nbits);
 
@@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 		memset(dst, 0, len);
 }
 
-static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
 	unsigned int len = bitmap_size(nbits);
 
@@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 		memset(dst, 0xff, len);
 }
 
-static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
-			unsigned int nbits)
+static __always_inline
+void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
 	unsigned int len = bitmap_size(nbits);
 
@@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 /*
  * Copy bitmap and clear tail bits in last word.
  */
-static inline void bitmap_copy_clear_tail(unsigned long *dst,
-		const unsigned long *src, unsigned int nbits)
+static __always_inline
+void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
 	bitmap_copy(dst, src, nbits);
 	if (nbits % BITS_PER_LONG)
@@ -306,16 +306,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
 	bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits))
 #endif
 
-static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_and(unsigned long *dst, const unsigned long *src1,
+		const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_and(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_or(unsigned long *dst, const unsigned long *src1,
+	       const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 | *src2;
@@ -323,8 +325,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 		__bitmap_or(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+		const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 ^ *src2;
@@ -332,16 +335,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 		__bitmap_xor(dst, src1, src2, nbits);
 }
 
-static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+		   const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-			unsigned int nbits)
+static __always_inline
+void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = ~(*src);
@@ -356,8 +360,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 #endif
 #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
 
-static inline bool bitmap_equal(const unsigned long *src1,
-				const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -376,10 +380,9 @@ static inline bool bitmap_equal(const unsigned long *src1,
  *
  * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
  */
-static inline bool bitmap_or_equal(const unsigned long *src1,
-				   const unsigned long *src2,
-				   const unsigned long *src3,
-				   unsigned int nbits)
+static __always_inline
+bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2,
+		     const unsigned long *src3, unsigned int nbits)
 {
 	if (!small_const_nbits(nbits))
 		return __bitmap_or_equal(src1, src2, src3, nbits);
@@ -387,9 +390,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1,
 	return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
 }
 
-static inline bool bitmap_intersects(const unsigned long *src1,
-				     const unsigned long *src2,
-				     unsigned int nbits)
+static __always_inline
+bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -397,8 +399,8 @@ static inline bool bitmap_intersects(const unsigned long *src1,
 		return __bitmap_intersects(src1, src2, nbits);
 }
 
-static inline bool bitmap_subset(const unsigned long *src1,
-				 const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -406,7 +408,8 @@ static inline bool bitmap_subset(const unsigned long *src1,
 		return __bitmap_subset(src1, src2, nbits);
 }
 
-static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
+static __always_inline
+bool bitmap_empty(const unsigned long *src, unsigned nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -414,7 +417,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
 	return find_first_bit(src, nbits) == nbits;
 }
 
-static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
+static __always_inline
+bool bitmap_full(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -448,8 +452,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1,
 	return __bitmap_weight_andnot(src1, src2, nbits);
 }
 
-static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
-		unsigned int nbits)
+static __always_inline
+void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__set_bit(start, map);
@@ -464,8 +468,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
 		__bitmap_set(map, start, nbits);
 }
 
-static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
-		unsigned int nbits)
+static __always_inline
+void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits)
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__clear_bit(start, map);
@@ -480,8 +484,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 		__bitmap_clear(map, start, nbits);
 }
 
-static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
-				unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+			unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
@@ -489,8 +494,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s
 		__bitmap_shift_right(dst, src, shift, nbits);
 }
 
-static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
-				unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+		       unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
@@ -498,11 +504,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
 		__bitmap_shift_left(dst, src, shift, nbits);
 }
 
-static inline void bitmap_replace(unsigned long *dst,
-				  const unsigned long *old,
-				  const unsigned long *new,
-				  const unsigned long *mask,
-				  unsigned int nbits)
+static __always_inline
+void bitmap_replace(unsigned long *dst,
+		    const unsigned long *old,
+		    const unsigned long *new,
+		    const unsigned long *mask,
+		    unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = (*old & ~(*mask)) | (*new & *mask);
@@ -545,8 +552,9 @@ static inline void bitmap_replace(unsigned long *dst,
  * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
  * See bitmap_scatter() for details related to this relationship.
  */
-static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
-				  const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_scatter(unsigned long *dst, const unsigned long *src,
+		    const unsigned long *mask, unsigned int nbits)
 {
 	unsigned int n = 0;
 	unsigned int bit;
@@ -599,8 +607,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
  * bitmap_scatter(res, src, mask, n) and a call to
  * bitmap_scatter(res, result, mask, n) will lead to the same res value.
  */
-static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
-				 const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_gather(unsigned long *dst, const unsigned long *src,
+		   const unsigned long *mask, unsigned int nbits)
 {
 	unsigned int n = 0;
 	unsigned int bit;
@@ -611,9 +620,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
 		__assign_bit(n++, dst, test_bit(bit, src));
 }
 
-static inline void bitmap_next_set_region(unsigned long *bitmap,
-					  unsigned int *rs, unsigned int *re,
-					  unsigned int end)
+static __always_inline
+void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs,
+			    unsigned int *re, unsigned int end)
 {
 	*rs = find_next_bit(bitmap, end, *rs);
 	*re = find_next_zero_bit(bitmap, end, *rs + 1);
@@ -628,7 +637,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
  * This is the complement to __bitmap_find_free_region() and releases
  * the found region (by clearing it in the bitmap).
  */
-static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	bitmap_clear(bitmap, pos, BIT(order));
 }
@@ -644,7 +654,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos
  * Returns: 0 on success, or %-EBUSY if specified region wasn't
  * free (not all bits were zero).
  */
-static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	unsigned int len = BIT(order);
 
@@ -668,7 +679,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos
  * Returns: the bit offset in bitmap of the allocated region,
  * or -errno on failure.
  */
-static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
+static __always_inline
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
 	unsigned int pos, end;		/* scans bitmap by regions of size order */
 
@@ -722,7 +734,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi
  * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits,
  * but we expect the lower 32-bits of u64.
  */
-static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask)
 {
 	bitmap_from_arr64(dst, &mask, 64);
 }
@@ -737,9 +749,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
  * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return
  * value is undefined.
  */
-static inline unsigned long bitmap_read(const unsigned long *map,
-					unsigned long start,
-					unsigned long nbits)
+static __always_inline
+unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits)
 {
 	size_t index = BIT_WORD(start);
 	unsigned long offset = start % BITS_PER_LONG;
@@ -772,8 +783,9 @@ static inline unsigned long bitmap_read(const unsigned long *map,
  *
  * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed.
  */
-static inline void bitmap_write(unsigned long *map, unsigned long value,
-				unsigned long start, unsigned long nbits)
+static __always_inline
+void bitmap_write(unsigned long *map, unsigned long value,
+		  unsigned long start, unsigned long nbits)
 {
 	size_t index;
 	unsigned long offset;
-- 
cgit v1.2.3


From ab6b1010dab68f6d4bf063517db4ce2d63554bc6 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Thu, 18 Jul 2024 17:50:39 -0700
Subject: cpumask: Switch from inline to __always_inline

On recent (v6.6+) builds with Clang (based on Clang 18.0.0) and certain
configurations [0], I'm finding that (lack of) inlining decisions may
lead to section mismatch warnings like the following:

  WARNING: modpost: vmlinux.o: section mismatch in reference:
  cpumask_andnot (section: .text) ->
  cpuhp_bringup_cpus_parallel.tmp_mask (section: .init.data) ERROR:
  modpost: Section mismatches detected.

or more confusingly:

  WARNING: modpost: vmlinux: section mismatch in reference:
  cpumask_andnot+0x5f (section: .text) -> efi_systab_phys (section:
  .init.data)

The first warning makes a little sense, because
cpuhp_bringup_cpus_parallel() (an __init function) calls
cpumask_andnot() on tmp_mask (an __initdata symbol). If the compiler
doesn't inline cpumask_andnot(), this may appear like a mismatch.

The second warning makes less sense, but might be because efi_systab_phys
and cpuhp_bringup_cpus_parallel.tmp_mask are laid out near each other,
and the latter isn't a proper C symbol definition.

In any case, it seems a reasonable solution to suggest more strongly to
the compiler that these cpumask macros *must* be inlined, as 'inline' is
just a recommendation.

This change has been previously proposed in the past as:

  Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline
  https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/

But the change has been split up, to separately justify the cpumask
changes (which drive my work) and the bitmap/const optimizations (that
Yury separately proposed for other reasons). This ends up as somewhere
between a "rebase" and "rewrite" -- I had to rewrite most of the patch.

According to bloat-o-meter, vmlinux decreases minimally in size (-0.00%
to -0.01%, depending on the version of GCC or Clang and .config in
question) with this series of changes:

gcc 13.2.0, x86_64_defconfig
-3005 bytes, Before=21944501, After=21941496, chg -0.01%

clang 16.0.6, x86_64_defconfig
-105 bytes, Before=22571692, After=22571587, chg -0.00%

gcc 9.5.0, x86_64_defconfig
-1771 bytes, Before=21557598, After=21555827, chg -0.01%

clang 18.0_pre516547 (ChromiumOS toolchain), x86_64_defconfig
-191 bytes, Before=22615339, After=22615148, chg -0.00%

clang 18.0_pre516547 (ChromiumOS toolchain), based on ChromiumOS config + gcov
-979 bytes, Before=76294783, After=76293804, chg -0.00%

[0] CONFIG_HOTPLUG_PARALLEL=y ('select'ed for x86 as of [1]) and
    CONFIG_GCOV_PROFILE_ALL.

[1] commit 0c7ffa32dbd6 ("x86/smpboot/64: Implement
    arch_cpuhp_init_parallel_bringup() and enable it")

Co-developed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/cpumask.h | 212 +++++++++++++++++++++++++-----------------------
 1 file changed, 112 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 801a7e524113..9e3f3c96607d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -30,7 +30,7 @@
 extern unsigned int nr_cpu_ids;
 #endif
 
-static inline void set_nr_cpu_ids(unsigned int nr)
+static __always_inline void set_nr_cpu_ids(unsigned int nr)
 {
 #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
 	WARN_ON(nr != nr_cpu_ids);
@@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
  *
  * Return: >= nr_cpu_ids if no cpus set.
  */
-static inline unsigned int cpumask_first(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first(const struct cpumask *srcp)
 {
 	return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
@@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
  *
  * Return: >= nr_cpu_ids if all cpus are set.
  */
-static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 {
 	return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
@@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
  *
  * Return: >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
  */
-static inline
+static __always_inline
 unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
 {
 	return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
@@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
  *
  * Return: >= nr_cpu_ids if no cpus set in all.
  */
-static inline
+static __always_inline
 unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
 				   const struct cpumask *srcp2,
 				   const struct cpumask *srcp3)
@@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
  *
  * Return:	>= nr_cpumask_bits if no CPUs set.
  */
-static inline unsigned int cpumask_last(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_last(const struct cpumask *srcp)
 {
 	return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
@@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
  *
  * Return: >= nr_cpu_ids if no further cpus set.
  */
-static inline
+static __always_inline
 unsigned int cpumask_next(int n, const struct cpumask *srcp)
 {
 	/* -1 is a legal arg here. */
@@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
  *
  * Return: >= nr_cpu_ids if no further cpus unset.
  */
-static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 {
 	/* -1 is a legal arg here. */
 	if (n != -1)
@@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 
 #if NR_CPUS == 1
 /* Uniprocessor: there is only one valid CPU */
-static inline unsigned int cpumask_local_spread(unsigned int i, int node)
+static __always_inline
+unsigned int cpumask_local_spread(unsigned int i, int node)
 {
 	return 0;
 }
 
-static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
-						      const struct cpumask *src2p)
+static __always_inline
+unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
+					const struct cpumask *src2p)
 {
 	return cpumask_first_and(src1p, src2p);
 }
 
-static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_any_distribute(const struct cpumask *srcp)
 {
 	return cpumask_first(srcp);
 }
@@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp);
  *
  * Return: >= nr_cpu_ids if no further cpus set in both.
  */
-static inline
+static __always_inline
 unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
-		     const struct cpumask *src2p)
+			      const struct cpumask *src2p)
 {
 	/* -1 is a legal arg here. */
 	if (n != -1)
@@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
 	for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
 
 #if NR_CPUS == 1
-static inline
+static __always_inline
 unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
 {
 	cpumask_check(start);
@@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * Often used to find any cpu but smp_processor_id() in a mask.
  * Return: >= nr_cpu_ids if no cpus set.
  */
-static inline
+static __always_inline
 unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 {
 	unsigned int i;
@@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  *
  * Returns >= nr_cpu_ids if no cpus set.
  */
-static inline
+static __always_inline
 unsigned int cpumask_any_and_but(const struct cpumask *mask1,
 				 const struct cpumask *mask2,
 				 unsigned int cpu)
@@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1,
  *
  * Return: >= nr_cpu_ids if such cpu doesn't exist.
  */
-static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
 {
 	return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
 }
@@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s
  *
  * Return: >= nr_cpu_ids if such cpu doesn't exist.
  */
-static inline
+static __always_inline
 unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
 							const struct cpumask *srcp2)
 {
@@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
  *
  * Return: >= nr_cpu_ids if such cpu doesn't exist.
  */
-static inline
+static __always_inline
 unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
 							const struct cpumask *srcp2)
 {
@@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
  * @cpu: cpu number (< nr_cpu_ids)
  * @dstp: the cpumask pointer
  */
-static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 {
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
-static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 {
 	__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
@@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp,
  *
  * Return: true if @cpu is set in @cpumask, else returns false
  */
-static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
 {
 	return test_bit(cpumask_check(cpu), cpumask_bits((cpumask)));
 }
@@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum
  *
  * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
  */
-static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
 {
 	return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
 }
@@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp
  *
  * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
  */
-static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
 {
 	return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
 }
@@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
  */
-static inline void cpumask_setall(struct cpumask *dstp)
+static __always_inline void cpumask_setall(struct cpumask *dstp)
 {
 	if (small_const_nbits(small_cpumask_bits)) {
 		cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
@@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
  * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
  */
-static inline void cpumask_clear(struct cpumask *dstp)
+static __always_inline void cpumask_clear(struct cpumask *dstp)
 {
 	bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
 }
@@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp)
  *
  * Return: false if *@dstp is empty, else returns true
  */
-static inline bool cpumask_and(struct cpumask *dstp,
-			       const struct cpumask *src1p,
-			       const struct cpumask *src2p)
+static __always_inline
+bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p,
+		 const struct cpumask *src2p)
 {
 	return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
 				       cpumask_bits(src2p), small_cpumask_bits);
@@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp,
  * @src1p: the first input
  * @src2p: the second input
  */
-static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
-			      const struct cpumask *src2p)
+static __always_inline
+void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
+		const struct cpumask *src2p)
 {
 	bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
 				      cpumask_bits(src2p), small_cpumask_bits);
@@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
  * @src1p: the first input
  * @src2p: the second input
  */
-static inline void cpumask_xor(struct cpumask *dstp,
-			       const struct cpumask *src1p,
-			       const struct cpumask *src2p)
+static __always_inline
+void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p,
+		 const struct cpumask *src2p)
 {
 	bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
 				       cpumask_bits(src2p), small_cpumask_bits);
@@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp,
  *
  * Return: false if *@dstp is empty, else returns true
  */
-static inline bool cpumask_andnot(struct cpumask *dstp,
-				  const struct cpumask *src1p,
-				  const struct cpumask *src2p)
+static __always_inline
+bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p,
+		    const struct cpumask *src2p)
 {
 	return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
 					  cpumask_bits(src2p), small_cpumask_bits);
@@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
  *
  * Return: true if the cpumasks are equal, false if not
  */
-static inline bool cpumask_equal(const struct cpumask *src1p,
-				const struct cpumask *src2p)
+static __always_inline
+bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p)
 {
 	return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
 						 small_cpumask_bits);
@@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
  * Return: true if first cpumask ORed with second cpumask == third cpumask,
  *	   otherwise false
  */
-static inline bool cpumask_or_equal(const struct cpumask *src1p,
-				    const struct cpumask *src2p,
-				    const struct cpumask *src3p)
+static __always_inline
+bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p,
+		      const struct cpumask *src3p)
 {
 	return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
 			       cpumask_bits(src3p), small_cpumask_bits);
@@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
  * Return: true if first cpumask ANDed with second cpumask is non-empty,
  *	   otherwise false
  */
-static inline bool cpumask_intersects(const struct cpumask *src1p,
-				     const struct cpumask *src2p)
+static __always_inline
+bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p)
 {
 	return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
 						      small_cpumask_bits);
@@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
  *
  * Return: true if *@src1p is a subset of *@src2p, else returns false
  */
-static inline bool cpumask_subset(const struct cpumask *src1p,
-				 const struct cpumask *src2p)
+static __always_inline
+bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p)
 {
 	return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
 						  small_cpumask_bits);
@@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
  *
  * Return: true if srcp is empty (has no bits set), else false
  */
-static inline bool cpumask_empty(const struct cpumask *srcp)
+static __always_inline bool cpumask_empty(const struct cpumask *srcp)
 {
 	return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
 }
@@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp)
  *
  * Return: true if srcp is full (has all bits set), else false
  */
-static inline bool cpumask_full(const struct cpumask *srcp)
+static __always_inline bool cpumask_full(const struct cpumask *srcp)
 {
 	return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
 }
@@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
  *
  * Return: count of bits set in *srcp
  */
-static inline unsigned int cpumask_weight(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp)
 {
 	return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
 }
@@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
  *
  * Return: count of bits set in both *srcp1 and *srcp2
  */
-static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
-						const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
 {
 	return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
@@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
  *
  * Return: count of bits set in both *srcp1 and *srcp2
  */
-static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
-						const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
+				   const struct cpumask *srcp2)
 {
 	return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
@@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
  * @srcp: the input to shift
  * @n: the number of bits to shift by
  */
-static inline void cpumask_shift_right(struct cpumask *dstp,
-				       const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n)
 {
 	bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
 					       small_cpumask_bits);
@@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
  * @srcp: the input to shift
  * @n: the number of bits to shift by
  */
-static inline void cpumask_shift_left(struct cpumask *dstp,
-				      const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
 {
 	bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
 					      nr_cpumask_bits);
@@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
  * @dstp: the result
  * @srcp: the input cpumask
  */
-static inline void cpumask_copy(struct cpumask *dstp,
-				const struct cpumask *srcp)
+static __always_inline
+void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
 {
 	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
@@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp,
  *
  * Return: -errno, or 0 for success.
  */
-static inline int cpumask_parse_user(const char __user *buf, int len,
-				     struct cpumask *dstp)
+static __always_inline
+int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp)
 {
 	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
@@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
  *
  * Return: -errno, or 0 for success.
  */
-static inline int cpumask_parselist_user(const char __user *buf, int len,
-				     struct cpumask *dstp)
+static __always_inline
+int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp)
 {
 	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
 				     nr_cpumask_bits);
@@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
  *
  * Return: -errno, or 0 for success.
  */
-static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 {
 	return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits);
 }
@@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
  *
  * Return: -errno, or 0 for success.
  */
-static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
 	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
@@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
  *
  * Return: size to allocate for a &struct cpumask in bytes
  */
-static inline unsigned int cpumask_size(void)
+static __always_inline unsigned int cpumask_size(void)
 {
 	return bitmap_size(large_cpumask_bits);
 }
@@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void)
 
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 
-static inline
+static __always_inline
 bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
 	return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
@@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
  *
  * Return: %true if allocation succeeded, %false if not
  */
-static inline
+static __always_inline
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE);
 }
 
-static inline
+static __always_inline
 bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return alloc_cpumask_var(mask, flags | __GFP_ZERO);
@@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
 void free_bootmem_cpumask_var(cpumask_var_t mask);
 
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
 {
 	return mask != NULL;
 }
@@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask)
 #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
 #define __cpumask_var_read_mostly
 
-static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return true;
 }
 
-static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
 					  int node)
 {
 	return true;
 }
 
-static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	cpumask_clear(*mask);
 	return true;
 }
 
-static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
 					  int node)
 {
 	cpumask_clear(*mask);
 	return true;
 }
 
-static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
 
-static inline void free_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_cpumask_var(cpumask_var_t mask)
 {
 }
 
-static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 }
 
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
 {
 	return true;
 }
@@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online);
 	((struct cpumask *)(1 ? (bitmap)				\
 			    : (void *)sizeof(__check_is_bitmap(bitmap))))
 
-static inline int __check_is_bitmap(const unsigned long *bitmap)
+static __always_inline int __check_is_bitmap(const unsigned long *bitmap)
 {
 	return 1;
 }
@@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap)
 extern const unsigned long
 	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
 
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 {
 	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
 	p -= cpu / BITS_PER_LONG;
@@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
 #define num_active_cpus()	cpumask_weight(cpu_active_mask)
 
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_online_mask);
 }
 
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_enabled_mask);
 }
 
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_possible_mask);
 }
 
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_present_mask);
 }
 
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_active_mask);
 }
 
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_dying_mask);
 }
@@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu)
 #define num_present_cpus()	1U
 #define num_active_cpus()	1U
 
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
 {
 	return cpu == 0;
 }
 
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
 {
 	return cpu == 0;
 }
 
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
 {
 	return cpu == 0;
 }
 
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
 {
 	return cpu == 0;
 }
 
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
 {
 	return cpu == 0;
 }
 
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
 {
 	return false;
 }
@@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu)
  * Return: the length of the (null-terminated) @buf string, zero if
  * nothing is copied.
  */
-static inline ssize_t
+static __always_inline ssize_t
 cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
 {
 	return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask),
@@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
  * Return: the length of how many bytes have been copied, excluding
  * terminating '\0'.
  */
-static inline ssize_t
-cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
-		loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
+				    loff_t off, size_t count)
 {
 	return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
 				   nr_cpu_ids, off, count) - 1;
@@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
  * Return: the length of how many bytes have been copied, excluding
  * terminating '\0'.
  */
-static inline ssize_t
-cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
-		loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+				 loff_t off, size_t count)
 {
 	return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
 				   nr_cpu_ids, off, count) - 1;
-- 
cgit v1.2.3


From 54c9e0085bd1015e524d8f4d3c4e78a7bc77ffca Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Thu, 18 Jul 2024 17:50:40 -0700
Subject: nodemask: Switch from inline to __always_inline

'inline' keyword is only a recommendation for compiler. If it decides to
not inline nodemask functions, the whole small_const_nbits() machinery
doesn't work.

This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch
replaces 'inline' directive with unconditional '__always_inline' to make
sure that there's always a chance for compile-time optimization. It doesn't
change size of kernel image, according to bloat-o-meter.

[[ Brian: split out from:
      Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline
      https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/
   But rewritten, as there were too many conflicts. ]]

Co-developed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/nodemask.h | 86 ++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index b61438313a73..9fd7a0ce9c1a 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_;
  */
 #define nodemask_pr_args(maskp)	__nodemask_pr_numnodes(maskp), \
 				__nodemask_pr_bits(maskp)
-static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
+static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
 {
 	return m ? MAX_NUMNODES : 0;
 }
-static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
+static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
 {
 	return m ? m->bits : NULL;
 }
@@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp)
 }
 
 #define node_clear(node, dst) __node_clear((node), &(dst))
-static inline void __node_clear(int node, volatile nodemask_t *dstp)
+static __always_inline void __node_clear(int node, volatile nodemask_t *dstp)
 {
 	clear_bit(node, dstp->bits);
 }
 
 #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
-static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
 {
 	bitmap_fill(dstp->bits, nbits);
 }
 
 #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
-static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
 {
 	bitmap_zero(dstp->bits, nbits);
 }
@@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
 
 #define node_test_and_set(node, nodemask) \
 			__node_test_and_set((node), &(nodemask))
-static inline bool __node_test_and_set(int node, nodemask_t *addr)
+static __always_inline bool __node_test_and_set(int node, nodemask_t *addr)
 {
 	return test_and_set_bit(node, addr->bits);
 }
 
 #define nodes_and(dst, src1, src2) \
 			__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
 
 #define nodes_or(dst, src1, src2) \
 			__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
 
 #define nodes_xor(dst, src1, src2) \
 			__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
 
 #define nodes_andnot(dst, src1, src2) \
 			__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
 
 #define nodes_complement(dst, src) \
 			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
-static inline void __nodes_complement(nodemask_t *dstp,
+static __always_inline void __nodes_complement(nodemask_t *dstp,
 					const nodemask_t *srcp, unsigned int nbits)
 {
 	bitmap_complement(dstp->bits, srcp->bits, nbits);
@@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
 
 #define nodes_equal(src1, src2) \
 			__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_equal(const nodemask_t *src1p,
+static __always_inline bool __nodes_equal(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	return bitmap_equal(src1p->bits, src2p->bits, nbits);
@@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p,
 
 #define nodes_intersects(src1, src2) \
 			__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_intersects(const nodemask_t *src1p,
+static __always_inline bool __nodes_intersects(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
@@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p,
 
 #define nodes_subset(src1, src2) \
 			__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_subset(const nodemask_t *src1p,
+static __always_inline bool __nodes_subset(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	return bitmap_subset(src1p->bits, src2p->bits, nbits);
 }
 
 #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
-static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_empty(srcp->bits, nbits);
 }
 
 #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
-static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_full(srcp->bits, nbits);
 }
 
 #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
-static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_weight(srcp->bits, nbits);
 }
 
 #define nodes_shift_right(dst, src, n) \
 			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_right(nodemask_t *dstp,
+static __always_inline void __nodes_shift_right(nodemask_t *dstp,
 					const nodemask_t *srcp, int n, int nbits)
 {
 	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
@@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp,
 
 #define nodes_shift_left(dst, src, n) \
 			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_left(nodemask_t *dstp,
+static __always_inline void __nodes_shift_left(nodemask_t *dstp,
 					const nodemask_t *srcp, int n, int nbits)
 {
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
@@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp,
           > MAX_NUMNODES, then the silly min_ts could be dropped. */
 
 #define first_node(src) __first_node(&(src))
-static inline unsigned int __first_node(const nodemask_t *srcp)
+static __always_inline unsigned int __first_node(const nodemask_t *srcp)
 {
 	return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
 }
 
 #define next_node(n, src) __next_node((n), &(src))
-static inline unsigned int __next_node(int n, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp)
 {
 	return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
 }
@@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp)
  * the first node in src if needed.  Returns MAX_NUMNODES if src is empty.
  */
 #define next_node_in(n, src) __next_node_in((n), &(src))
-static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
 {
 	unsigned int ret = __next_node(node, srcp);
 
@@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
 	return ret;
 }
 
-static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node)
 {
 	nodes_clear(*mask);
 	node_set(node, *mask);
@@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node)
 })
 
 #define first_unset_node(mask) __first_unset_node(&(mask))
-static inline unsigned int __first_unset_node(const nodemask_t *maskp)
+static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp)
 {
 	return min_t(unsigned int, MAX_NUMNODES,
 			find_first_zero_bit(maskp->bits, MAX_NUMNODES));
@@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp)
 
 #define nodemask_parse_user(ubuf, ulen, dst) \
 		__nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES)
-static inline int __nodemask_parse_user(const char __user *buf, int len,
+static __always_inline int __nodemask_parse_user(const char __user *buf, int len,
 					nodemask_t *dstp, int nbits)
 {
 	return bitmap_parse_user(buf, len, dstp->bits, nbits);
 }
 
 #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES)
-static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
+static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
 {
 	return bitmap_parselist(buf, dstp->bits, nbits);
 }
 
 #define node_remap(oldbit, old, new) \
 		__node_remap((oldbit), &(old), &(new), MAX_NUMNODES)
-static inline int __node_remap(int oldbit,
+static __always_inline int __node_remap(int oldbit,
 		const nodemask_t *oldp, const nodemask_t *newp, int nbits)
 {
 	return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
@@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit,
 
 #define nodes_remap(dst, src, old, new) \
 		__nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES)
-static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
+static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
 		const nodemask_t *oldp, const nodemask_t *newp, int nbits)
 {
 	bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
@@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
 
 #define nodes_onto(dst, orig, relmap) \
 		__nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES)
-static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
 		const nodemask_t *relmapp, int nbits)
 {
 	bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
@@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
 
 #define nodes_fold(dst, orig, sz) \
 		__nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES)
-static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
 		int sz, int nbits)
 {
 	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
@@ -418,22 +418,22 @@ enum node_states {
 extern nodemask_t node_states[NR_NODE_STATES];
 
 #if MAX_NUMNODES > 1
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
 {
 	return node_isset(node, node_states[state]);
 }
 
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
 {
 	__node_set(node, &node_states[state]);
 }
 
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
 {
 	__node_clear(node, &node_states[state]);
 }
 
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
 {
 	return nodes_weight(node_states[state]);
 }
@@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state)
 
 #define first_online_node	first_node(node_states[N_ONLINE])
 #define first_memory_node	first_node(node_states[N_MEMORY])
-static inline unsigned int next_online_node(int nid)
+static __always_inline unsigned int next_online_node(int nid)
 {
 	return next_node(nid, node_states[N_ONLINE]);
 }
-static inline unsigned int next_memory_node(int nid)
+static __always_inline unsigned int next_memory_node(int nid)
 {
 	return next_node(nid, node_states[N_MEMORY]);
 }
@@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid)
 extern unsigned int nr_node_ids;
 extern unsigned int nr_online_nodes;
 
-static inline void node_set_online(int nid)
+static __always_inline void node_set_online(int nid)
 {
 	node_set_state(nid, N_ONLINE);
 	nr_online_nodes = num_node_state(N_ONLINE);
 }
 
-static inline void node_set_offline(int nid)
+static __always_inline void node_set_offline(int nid)
 {
 	node_clear_state(nid, N_ONLINE);
 	nr_online_nodes = num_node_state(N_ONLINE);
@@ -469,20 +469,20 @@ static inline void node_set_offline(int nid)
 
 #else
 
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
 {
 	return node == 0;
 }
 
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
 {
 }
 
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
 {
 }
 
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
 {
 	return 1;
 }
@@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state)
 
 #endif
 
-static inline int node_random(const nodemask_t *maskp)
+static __always_inline int node_random(const nodemask_t *maskp)
 {
 #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
 	int w, bit;
-- 
cgit v1.2.3


From 92a10d3861491d09e73b35db7113dd049659f588 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 30 Jul 2024 22:15:16 +0200
Subject: runtime constants: move list of constants to vmlinux.lds.h

Refactor the list of constant variables into a macro.
This should make it easier to add more constants in the future.

Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/vmlinux.lds.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1ae44793132a..2157d8802351 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -918,6 +918,10 @@
 
 #define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x)
 
+#define RUNTIME_CONST_VARIABLES						\
+		RUNTIME_CONST(shift, d_hash_shift)			\
+		RUNTIME_CONST(ptr, dentry_hashtable)
+
 /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
 #define KUNIT_TABLE()							\
 		. = ALIGN(8);						\
-- 
cgit v1.2.3


From b27404b2bbf951a11500525dea15681cc970226c Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 19 Aug 2024 08:55:46 +0800
Subject: ALSA/ASoC/SoundWire: Intel: use single definition for
 SDW_INTEL_MAX_LINKS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The definitions are currently duplicated in intel-sdw-acpi.c and
sof_sdw.c.  Move the definition to the sdw_intel.h header, and change
the prefix to make it Intel-specific.

No functionality change in this patch.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240819005548.5867-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index d537587b4499..87d82ea9a13a 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -447,4 +447,9 @@ extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops;
 
 #define SDW_INTEL_DEV_NUM_IDA_MIN           6
 
+/*
+ * Max number of links supported in hardware
+ */
+#define SDW_INTEL_MAX_LINKS                4
+
 #endif
-- 
cgit v1.2.3


From d2234596be2192d9c1ae34f8c7534531191bc433 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 19 Aug 2024 08:55:47 +0800
Subject: soundwire: intel: add probe-time check on link id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In older platforms, the number of links was constant and hard-coded to
4. Newer platforms can have varying number of links, so we need to add
a probe-time check to make sure the ACPI-reported information with
_DSD properties is aligned with hardware capabilities reported in the
SoundWire LCAP register.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240819005548.5867-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 87d82ea9a13a..cb8e7396b4db 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -388,6 +388,7 @@ struct sdw_intel;
 /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms.
  * @debugfs_init: initialize all debugfs capabilities
  * @debugfs_exit: close and cleanup debugfs capabilities
+ * @get_link_count: fetch link count from hardware registers
  * @register_dai: read all PDI information and register DAIs
  * @check_clock_stop: throw error message if clock is not stopped.
  * @start_bus: normal start
@@ -412,6 +413,8 @@ struct sdw_intel_hw_ops {
 	void (*debugfs_init)(struct sdw_intel *sdw);
 	void (*debugfs_exit)(struct sdw_intel *sdw);
 
+	int (*get_link_count)(struct sdw_intel *sdw);
+
 	int (*register_dai)(struct sdw_intel *sdw);
 
 	void (*check_clock_stop)(struct sdw_intel *sdw);
-- 
cgit v1.2.3


From 1f3662838a05f1ab6af89a417f6f252d91d0806b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 19 Aug 2024 08:55:48 +0800
Subject: soundwire: intel: increase maximum number of links
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Intel platforms have enabled 4 links since the beginning, newer
platforms now have 5 links. Update the definition accordingly.

This patch will have no effect on older platforms where the number of
links was hard-coded. A follow-up patch will add a dynamic check that
the ACPI-reported information is aligned with hardware capabilities on
newer platforms.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240819005548.5867-4-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index cb8e7396b4db..37ae69365fe2 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -453,6 +453,6 @@ extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops;
 /*
  * Max number of links supported in hardware
  */
-#define SDW_INTEL_MAX_LINKS                4
+#define SDW_INTEL_MAX_LINKS                5
 
 #endif
-- 
cgit v1.2.3


From 820a185896b77814557302b981b092a9e7b36814 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 24 Jul 2024 15:15:35 +0200
Subject: fcntl: add F_CREATED_QUERY

Systemd has a helper called openat_report_new() that returns whether a
file was created anew or it already existed before for cases where
O_CREAT has to be used without O_EXCL (cf. [1]). That apparently isn't
something that's specific to systemd but it's where I noticed it.

The current logic is that it first attempts to open the file without
O_CREAT | O_EXCL and if it gets ENOENT the helper tries again with both
flags. If that succeeds all is well. If it now reports EEXIST it
retries.

That works fairly well but some corner cases make this more involved. If
this operates on a dangling symlink the first openat() without O_CREAT |
O_EXCL will return ENOENT but the second openat() with O_CREAT | O_EXCL
will fail with EEXIST. The reason is that openat() without O_CREAT |
O_EXCL follows the symlink while O_CREAT | O_EXCL doesn't for security
reasons. So it's not something we can really change unless we add an
explicit opt-in via O_FOLLOW which seems really ugly.

The caller could try and use fanotify() to register to listen for
creation events in the directory before calling openat(). The caller
could then compare the returned tid to its own tid to ensure that even
in threaded environments it actually created the file. That might work
but is a lot of work for something that should be fairly simple and I'm
uncertain about it's reliability.

The caller could use a bpf lsm hook to hook into security_file_open() to
figure out whether they created the file. That also seems a bit wild.

So let's add F_CREATED_QUERY which allows the caller to check whether
they actually did create the file. That has caveats of course but I
don't think they are problematic:

* In multi-threaded environments a thread can only be sure that it did
  create the file if it calls openat() with O_CREAT. In other words,
  it's obviously not enough to just go through it's fdtable and check
  these fds because another thread could've created the file.

* If there's any codepaths where an openat() with O_CREAT would yield
  the same struct file as that of another thread it would obviously
  cause wrong results. I'm not aware of any such codepaths from openat()
  itself. Imho, that would be a bug.

* Related to the previous point, calling the new fcntl() on files created
  and opened via special-purpose system calls or ioctl()s would cause
  wrong results only if the affected subsystem a) raises FMODE_CREATED
  and b) may return the same struct file for two different calls. I'm
  not seeing anything outside of regular VFS code that raises
  FMODE_CREATED.

  There is code for b) in e.g., the drm layer where the same struct file
  is resurfaced but again FMODE_CREATED isn't used and it would be very
  misleading if it did.

Link: https://github.com/systemd/systemd/blob/11d5e2b5fbf9f6bfa5763fd45b56829ad4f0777f/src/basic/fs-util.c#L1078 [1]
Link: https://lore.kernel.org/r/20240724-work-fcntl-v1-1-e8153a2f1991@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/fcntl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index c0bcc185fa48..e55a3314bcb0 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -16,6 +16,9 @@
 
 #define F_DUPFD_QUERY	(F_LINUX_SPECIFIC_BASE + 3)
 
+/* Was the file just created? */
+#define F_CREATED_QUERY	(F_LINUX_SPECIFIC_BASE + 4)
+
 /*
  * Cancel a blocking posix lock; internal use only until we expose an
  * asynchronous lock api to userspace:
-- 
cgit v1.2.3


From c393eaa85349941badf3ce5f087400dbaf3bbe02 Mon Sep 17 00:00:00 2001
From: Haifeng Xu <haifeng.xu@shopee.com>
Date: Fri, 26 Jul 2024 11:05:25 +0800
Subject: fs: don't flush in-flight wb switches for superblocks without cgroup
 writeback

When deactivating any type of superblock, it had to wait for the in-flight
wb switches to be completed. wb switches are executed in inode_switch_wbs_work_fn()
which needs to acquire the wb_switch_rwsem and races against sync_inodes_sb().
If there are too much dirty data in the superblock, the waiting time may increase
significantly.

For superblocks without cgroup writeback such as tmpfs, they have nothing to
do with the wb swithes, so the flushing can be avoided.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Link: https://lore.kernel.org/r/20240726030525.180330-1-haifeng.xu@shopee.com
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/writeback.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1a54676d843a..56b85841ae4c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -217,7 +217,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
 int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done);
-void cgroup_writeback_umount(void);
+void cgroup_writeback_umount(struct super_block *sb);
 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
 
 /**
@@ -324,7 +324,7 @@ static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
 {
 }
 
-static inline void cgroup_writeback_umount(void)
+static inline void cgroup_writeback_umount(struct super_block *sb)
 {
 }
 
-- 
cgit v1.2.3


From c01a5d89e5c8abe638107be2a4ea9e4c7fcdd7f6 Mon Sep 17 00:00:00 2001
From: Wang Long <w@laoqinren.net>
Date: Fri, 2 Aug 2024 17:19:01 +0800
Subject: percpu-rwsem: remove the unused parameter 'read'

In the function percpu_rwsem_release, the parameter `read`
is unused, so remove it.

Signed-off-by: Wang Long <w@laoqinren.net>
Link: https://lore.kernel.org/r/20240802091901.2546797-1-w@laoqinren.net
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h           | 2 +-
 include/linux/percpu-rwsem.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb0426f349fc..c0286d479c9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1683,7 +1683,7 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
 #define __sb_writers_acquired(sb, lev)	\
 	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
 #define __sb_writers_release(sb, lev)	\
-	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
 
 /**
  * __sb_write_started - check if sb freeze level is held
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 36b942b67b7d..c012df33a9f0 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -145,7 +145,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 #define percpu_rwsem_assert_held(sem)	lockdep_assert_held(sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
-					bool read, unsigned long ip)
+					unsigned long ip)
 {
 	lock_release(&sem->dep_map, ip);
 }
-- 
cgit v1.2.3


From cd8e468efb4fb2742e06328a75b282c35c1abf8d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 14 Aug 2024 21:01:57 +0200
Subject: ACPI: video: Add Dell UART backlight controller detection

Dell All In One (AIO) models released after 2017 use a backlight
controller board connected to an UART.

In DSDT this uart port will be defined as:

   Name (_HID, "DELL0501")
   Name (_CID, EisaId ("PNP0501")

Commit 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver")
has added support for this, but I neglected to tie this into
acpi_video_get_backlight_type().

Now the first AIO has turned up which has not only the DSDT bits for this,
but also an actual controller attached to the UART, yet it is not using
this controller for backlight control.

Add support to acpi_video_get_backlight_type() for a new dell_uart
backlight type. So that the existing infra to override the backlight
control method on the commandline or with DMI quirks can be used.

Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver")
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://patch.msgid.link/20240814190159.15650-2-hdegoede@redhat.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/video.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/video.h b/include/acpi/video.h
index 3d538d4178ab..044c463138df 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -50,6 +50,7 @@ enum acpi_backlight_type {
 	acpi_backlight_native,
 	acpi_backlight_nvidia_wmi_ec,
 	acpi_backlight_apple_gmux,
+	acpi_backlight_dell_uart,
 };
 
 #if IS_ENABLED(CONFIG_ACPI_VIDEO)
-- 
cgit v1.2.3


From ad614a706b1ac83b95b333f44b8f5e70bcb37dc5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 29 Jul 2024 11:26:34 +0200
Subject: drm/xe/oa/uapi: Make bit masks unsigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with gcc-5:

    In function ‘decode_oa_format.isra.26’,
	inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6:
    ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant
    [...]
    ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’
       __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
       ^
    drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’
      u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
		      ^

Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit f2881dfdaaa9ec873dbd383ef5512fc31e576cbb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 19619d4952a8..db232a25189e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id {
 	 * b. Counter select c. Counter size and d. BC report. Also refer to the
 	 * oa_formats array in drivers/gpu/drm/xe/xe_oa.c.
 	 */
-#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xff << 0)
-#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xff << 8)
-#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xff << 16)
-#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xff << 24)
+#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xffu << 0)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xffu << 8)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xffu << 16)
+#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xffu << 24)
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
-- 
cgit v1.2.3


From 81475beb1b5996505a39cd1d9316ce1e668932a2 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 15 Aug 2024 16:32:28 +0000
Subject: block: Drop NULL check in bdev_write_zeroes_sectors()

Function bdev_get_queue() must not return NULL, so drop the check in
bdev_write_zeroes_sectors().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Link: https://lore.kernel.org/r/20240815163228.216051-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e85ec73a07d5..b7664d593486 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1296,12 +1296,7 @@ bdev_max_secure_erase_sectors(struct block_device *bdev)
 
 static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return q->limits.max_write_zeroes_sectors;
-
-	return 0;
+	return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors;
 }
 
 static inline bool bdev_nonrot(struct block_device *bdev)
-- 
cgit v1.2.3


From decbfaf06db05fa1f9b33149ebb3c145b44e878f Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Fri, 16 Aug 2024 15:51:53 +0200
Subject: drm/ttm: Add a flag to allow drivers to skip clear-on-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
releasing backing stores if they want to skip clear-on-free.

Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 include/drm/ttm/ttm_tt.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 2b9d856ff388..cfaf49de2419 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -85,6 +85,9 @@ struct ttm_tt {
 	 * fault handling abuses the DMA api a bit and dma_map_attrs can't be
 	 * used to assure pgprot always matches.
 	 *
+	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles
+	 * clearing backing store
+	 *
 	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
 	 * set by TTM after ttm_tt_populate() has successfully returned, and is
 	 * then unset when TTM calls ttm_tt_unpopulate().
@@ -94,8 +97,9 @@ struct ttm_tt {
 #define TTM_TT_FLAG_EXTERNAL		BIT(2)
 #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
 #define TTM_TT_FLAG_DECRYPTED		BIT(4)
+#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
 
-#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
+#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
 	uint32_t page_flags;
 	/** @num_pages: Number of pages in the page array. */
 	uint32_t num_pages;
-- 
cgit v1.2.3


From dc0112e6d8b42b39f9d283bab489a757e9d284f0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 12 Aug 2024 11:47:59 -0400
Subject: rpcrdma: Trace connection registration and unregistration

These new trace points record xarray indices and the time of
endpoint registration and unregistration, to co-ordinate with
device removal events.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index ba2d6a0e41cc..a96a985c49b3 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2277,6 +2277,42 @@ DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
 DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
 DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
 
+DECLARE_EVENT_CLASS(rpcrdma_client_register_class,
+	TP_PROTO(
+		const struct ib_device *device,
+		const struct rpcrdma_notification *rn
+	),
+
+	TP_ARGS(device, rn),
+
+	TP_STRUCT__entry(
+		__string(name, device->name)
+		__field(void *, callback)
+		__field(u32, index)
+	),
+
+	TP_fast_assign(
+		__assign_str(name);
+		__entry->callback = rn->rn_done;
+		__entry->index = rn->rn_index;
+	),
+
+	TP_printk("device=%s index=%u done callback=%pS\n",
+		__get_str(name), __entry->index, __entry->callback
+	)
+);
+
+#define DEFINE_CLIENT_REGISTER_EVENT(name)				\
+	DEFINE_EVENT(rpcrdma_client_register_class, name,		\
+	TP_PROTO(							\
+		const struct ib_device *device,				\
+		const struct rpcrdma_notification *rn			\
+	),								\
+	TP_ARGS(device, rn))
+
+DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register);
+DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister);
+
 #endif /* _TRACE_RPCRDMA_H */
 
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 10 Jul 2024 10:58:29 +0200
Subject: netfilter: nf_tables: store new sets in dedicated list

nft_set_lookup_byid() is very slow when transaction becomes large, due to
walk of the transaction list.

Add a dedicated list that contains only the new sets.

Before: nft -f ruleset 0.07s user 0.00s system 0% cpu 1:04.84 total
After: nft -f ruleset 0.07s user 0.00s system 0% cpu 30.115 total

.. where ruleset contains ~10 sets with ~100k elements.
The above number is for a combined flush+reload of the ruleset.

With previous flush, even the first NEWELEM has to walk through a few
hundred thousands of DELSET(ELEM) transactions before the first NEWSET
object. To cope with random-order-newset-newsetelem we'd need to replace
commit_set_list with a hashtable.

Expectation is that a NEWELEM operation refers to the most recently added
set, so last entry of the dedicated list should be the set we want.

NB: This is not a bug fix per se (functionality is fine), but with
larger transaction batches list search takes forever, so it would be
nice to speed this up for -stable too, hence adding a "fixes" tag.

Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets")
Reported-by: Nadia Pinaeva <n.m.pinaeva@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1bfdd16890fa..2be4738eae1c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1674,6 +1674,7 @@ struct nft_trans_rule {
 
 struct nft_trans_set {
 	struct nft_trans_binding	nft_trans_binding;
+	struct list_head		list_trans_newset;
 	struct nft_set			*set;
 	u32				set_id;
 	u32				gc_int;
@@ -1875,6 +1876,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
 struct nftables_pernet {
 	struct list_head	tables;
 	struct list_head	commit_list;
+	struct list_head	commit_set_list;
 	struct list_head	binding_list;
 	struct list_head	module_list;
 	struct list_head	notify_list;
-- 
cgit v1.2.3


From d5283b47e225e1473e1a07085b9c4e6bfd08ba51 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Jul 2024 22:09:44 -0400
Subject: netfilter: move nf_ct_netns_get out of nf_conncount_init

This patch is to move nf_ct_netns_get() out of nf_conncount_init()
and let the consumers of nf_conncount decide if they want to turn
on netfilter conntrack.

It makes nf_conncount more flexible to be used in other places and
avoids netfilter conntrack turned on when using it in openvswitch
conntrack.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e227d997fc71..1b58b5b91ff6 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -15,10 +15,8 @@ struct nf_conncount_list {
 	unsigned int count;	/* length of list */
 };
 
-struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
-					    unsigned int keylen);
-void nf_conncount_destroy(struct net *net, unsigned int family,
-			  struct nf_conncount_data *data);
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
+void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
 
 unsigned int nf_conncount_count(struct net *net,
 				struct nf_conncount_data *data,
-- 
cgit v1.2.3


From 2865baf54077aa98fcdb478cefe6a42c417b9374 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 8 Apr 2024 20:04:58 -0700
Subject: x86: support user address masking instead of non-speculative
 conditional

The Spectre-v1 mitigations made "access_ok()" much more expensive, since
it has to serialize execution with the test for a valid user address.

All the normal user copy routines avoid this by just masking the user
address with a data-dependent mask instead, but the fast
"unsafe_user_read()" kind of patterms that were supposed to be a fast
case got slowed down.

This introduces a notion of using

	src = masked_user_access_begin(src);

to do the user address sanity using a data-dependent mask instead of the
more traditional conditional

	if (user_read_access_begin(src, len)) {

model.

This model only works for dense accesses that start at 'src' and on
architectures that have a guard region that is guaranteed to fault in
between the user space and the kernel space area.

With this, the user access doesn't need to be manually checked, because
a bad address is guaranteed to fault (by some architecture masking
trick: on x86-64 this involves just turning an invalid user address into
all ones, since we don't map the top of address space).

This only converts a couple of examples for now.  Example x86-64 code
generation for loading two words from user space:

        stac
        mov    %rax,%rcx
        sar    $0x3f,%rcx
        or     %rax,%rcx
        mov    (%rcx),%r13
        mov    0x8(%rcx),%r14
        clac

where all the error handling and -EFAULT is now purely handled out of
line by the exception path.

Of course, if the micro-architecture does badly at 'clac' and 'stac',
the above is still pitifully slow.  But at least we did as well as we
could.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uaccess.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 3064314f4832..f18371f6cf36 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -32,6 +32,13 @@
 })
 #endif
 
+#ifdef masked_user_access_begin
+ #define can_do_masked_user_access() 1
+#else
+ #define can_do_masked_user_access() 0
+ #define masked_user_access_begin(src) NULL
+#endif
+
 /*
  * Architectures should provide two primitives (raw_copy_{to,from}_user())
  * and get rid of their private instances of copy_{to,from}_user() and
-- 
cgit v1.2.3


From 8dffaec34dd55473adcbc924a4c9b04aaa0d4278 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 19 Aug 2024 12:18:23 -1000
Subject: workqueue: Fix htmldocs build warning

Fix htmldocs build warning introduced by ec0a7d44b358 ("workqueue: Add
interface for user-defined workqueue lockdep map").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Matthew Brost <matthew.brost@intel.com>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8ccbf510880b..04dff07a9e2b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
  * @lockdep_map: user-defined lockdep_map
- * @args: args for @fmt
+ * @...: args for @fmt
  *
  * Same as alloc_ordered_workqueue but with the a user-define lockdep_map.
  * Useful for workqueues created with the same purpose and to avoid leaking a
-- 
cgit v1.2.3


From 7f6287417baf57754f47687c6ea1a749a0686ab0 Mon Sep 17 00:00:00 2001
From: Matteo Croce <teknoraver@meta.com>
Date: Mon, 19 Aug 2024 18:28:05 +0200
Subject: bpf: Allow bpf_current_task_under_cgroup() with BPF_CGROUP_*

The helper bpf_current_task_under_cgroup() currently is only allowed for
tracing programs, allow its usage also in the BPF_CGROUP_* program types.

Move the code from kernel/trace/bpf_trace.c to kernel/bpf/helpers.c,
so it compiles also without CONFIG_BPF_EVENTS.

This will be used in systemd-networkd to monitor the sysctl writes,
and filter it's own writes from others:
https://github.com/systemd/systemd/pull/32212

Signed-off-by: Matteo Croce <teknoraver@meta.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240819162805.78235-3-technoboy85@gmail.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b9425e410bcb..f0192c173ed8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3206,6 +3206,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
 extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
 extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
+extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
 extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
 extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
 extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
-- 
cgit v1.2.3


From 807067bf014d4a3ae2cc55bd3de16f22a01eb580 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 15 Aug 2024 15:04:37 -0700
Subject: kcm: Serialise kcm_sendmsg() for the same socket.

syzkaller reported UAF in kcm_release(). [0]

The scenario is

  1. Thread A builds a skb with MSG_MORE and sets kcm->seq_skb.

  2. Thread A resumes building skb from kcm->seq_skb but is blocked
     by sk_stream_wait_memory()

  3. Thread B calls sendmsg() concurrently, finishes building kcm->seq_skb
     and puts the skb to the write queue

  4. Thread A faces an error and finally frees skb that is already in the
     write queue

  5. kcm_release() does double-free the skb in the write queue

When a thread is building a MSG_MORE skb, another thread must not touch it.

Let's add a per-sk mutex and serialise kcm_sendmsg().

[0]:
BUG: KASAN: slab-use-after-free in __skb_unlink include/linux/skbuff.h:2366 [inline]
BUG: KASAN: slab-use-after-free in __skb_dequeue include/linux/skbuff.h:2385 [inline]
BUG: KASAN: slab-use-after-free in __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline]
BUG: KASAN: slab-use-after-free in __skb_queue_purge include/linux/skbuff.h:3181 [inline]
BUG: KASAN: slab-use-after-free in kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691
Read of size 8 at addr ffff0000ced0fc80 by task syz-executor329/6167

CPU: 1 PID: 6167 Comm: syz-executor329 Tainted: G    B              6.8.0-rc5-syzkaller-g9abbc24128bc #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
Call trace:
 dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:291
 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:298
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106
 print_address_description mm/kasan/report.c:377 [inline]
 print_report+0x178/0x518 mm/kasan/report.c:488
 kasan_report+0xd8/0x138 mm/kasan/report.c:601
 __asan_report_load8_noabort+0x20/0x2c mm/kasan/report_generic.c:381
 __skb_unlink include/linux/skbuff.h:2366 [inline]
 __skb_dequeue include/linux/skbuff.h:2385 [inline]
 __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline]
 __skb_queue_purge include/linux/skbuff.h:3181 [inline]
 kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691
 __sock_release net/socket.c:659 [inline]
 sock_close+0xa4/0x1e8 net/socket.c:1421
 __fput+0x30c/0x738 fs/file_table.c:376
 ____fput+0x20/0x30 fs/file_table.c:404
 task_work_run+0x230/0x2e0 kernel/task_work.c:180
 exit_task_work include/linux/task_work.h:38 [inline]
 do_exit+0x618/0x1f64 kernel/exit.c:871
 do_group_exit+0x194/0x22c kernel/exit.c:1020
 get_signal+0x1500/0x15ec kernel/signal.c:2893
 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249
 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148
 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline]
 exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline]
 el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713
 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730
 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598

Allocated by task 6166:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x40/0x78 mm/kasan/common.c:68
 kasan_save_alloc_info+0x70/0x84 mm/kasan/generic.c:626
 unpoison_slab_object mm/kasan/common.c:314 [inline]
 __kasan_slab_alloc+0x74/0x8c mm/kasan/common.c:340
 kasan_slab_alloc include/linux/kasan.h:201 [inline]
 slab_post_alloc_hook mm/slub.c:3813 [inline]
 slab_alloc_node mm/slub.c:3860 [inline]
 kmem_cache_alloc_node+0x204/0x4c0 mm/slub.c:3903
 __alloc_skb+0x19c/0x3d8 net/core/skbuff.c:641
 alloc_skb include/linux/skbuff.h:1296 [inline]
 kcm_sendmsg+0x1d3c/0x2124 net/kcm/kcmsock.c:783
 sock_sendmsg_nosec net/socket.c:730 [inline]
 __sock_sendmsg net/socket.c:745 [inline]
 sock_sendmsg+0x220/0x2c0 net/socket.c:768
 splice_to_socket+0x7cc/0xd58 fs/splice.c:889
 do_splice_from fs/splice.c:941 [inline]
 direct_splice_actor+0xec/0x1d8 fs/splice.c:1164
 splice_direct_to_actor+0x438/0xa0c fs/splice.c:1108
 do_splice_direct_actor fs/splice.c:1207 [inline]
 do_splice_direct+0x1e4/0x304 fs/splice.c:1233
 do_sendfile+0x460/0xb3c fs/read_write.c:1295
 __do_sys_sendfile64 fs/read_write.c:1362 [inline]
 __se_sys_sendfile64 fs/read_write.c:1348 [inline]
 __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1348
 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
 invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51
 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136
 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155
 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712
 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730
 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598

Freed by task 6167:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x40/0x78 mm/kasan/common.c:68
 kasan_save_free_info+0x5c/0x74 mm/kasan/generic.c:640
 poison_slab_object+0x124/0x18c mm/kasan/common.c:241
 __kasan_slab_free+0x3c/0x78 mm/kasan/common.c:257
 kasan_slab_free include/linux/kasan.h:184 [inline]
 slab_free_hook mm/slub.c:2121 [inline]
 slab_free mm/slub.c:4299 [inline]
 kmem_cache_free+0x15c/0x3d4 mm/slub.c:4363
 kfree_skbmem+0x10c/0x19c
 __kfree_skb net/core/skbuff.c:1109 [inline]
 kfree_skb_reason+0x240/0x6f4 net/core/skbuff.c:1144
 kfree_skb include/linux/skbuff.h:1244 [inline]
 kcm_release+0x104/0x4c8 net/kcm/kcmsock.c:1685
 __sock_release net/socket.c:659 [inline]
 sock_close+0xa4/0x1e8 net/socket.c:1421
 __fput+0x30c/0x738 fs/file_table.c:376
 ____fput+0x20/0x30 fs/file_table.c:404
 task_work_run+0x230/0x2e0 kernel/task_work.c:180
 exit_task_work include/linux/task_work.h:38 [inline]
 do_exit+0x618/0x1f64 kernel/exit.c:871
 do_group_exit+0x194/0x22c kernel/exit.c:1020
 get_signal+0x1500/0x15ec kernel/signal.c:2893
 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249
 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148
 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline]
 exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline]
 el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713
 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730
 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598

The buggy address belongs to the object at ffff0000ced0fc80
 which belongs to the cache skbuff_head_cache of size 240
The buggy address is located 0 bytes inside of
 freed 240-byte region [ffff0000ced0fc80, ffff0000ced0fd70)

The buggy address belongs to the physical page:
page:00000000d35f4ae4 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10ed0f
flags: 0x5ffc00000000800(slab|node=0|zone=2|lastcpupid=0x7ff)
page_type: 0xffffffff()
raw: 05ffc00000000800 ffff0000c1cbf640 fffffdffc3423100 dead000000000004
raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff0000ced0fb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff0000ced0fc00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
>ffff0000ced0fc80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                   ^
 ffff0000ced0fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc
 ffff0000ced0fd80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb

Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Reported-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=b72d86aa5df17ce74c60
Tested-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240815220437.69511-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/kcm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/kcm.h b/include/net/kcm.h
index 90279e5e09a5..441e993be634 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h
@@ -70,6 +70,7 @@ struct kcm_sock {
 	struct work_struct tx_work;
 	struct list_head wait_psock_list;
 	struct sk_buff *seq_skb;
+	struct mutex tx_mutex;
 	u32 tx_stopped : 1;
 
 	/* Don't use bit fields here, these are set under different locks */
-- 
cgit v1.2.3


From 0311507792b54069ac72e0a6c6b35c5d40aadad8 Mon Sep 17 00:00:00 2001
From: Deven Bowers <deven.desai@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:15 -0700
Subject: lsm: add IPE lsm

Integrity Policy Enforcement (IPE) is an LSM that provides an
complimentary approach to Mandatory Access Control than existing LSMs
today.

Existing LSMs have centered around the concept of access to a resource
should be controlled by the current user's credentials. IPE's approach,
is that access to a resource should be controlled by the system's trust
of a current resource.

The basis of this approach is defining a global policy to specify which
resource can be trusted.

Signed-off-by: Deven Bowers <deven.desai@linux.microsoft.com>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/lsm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h
index 33d8c9f4aa6b..938593dfd5da 100644
--- a/include/uapi/linux/lsm.h
+++ b/include/uapi/linux/lsm.h
@@ -64,6 +64,7 @@ struct lsm_ctx {
 #define LSM_ID_LANDLOCK		110
 #define LSM_ID_IMA		111
 #define LSM_ID_EVM		112
+#define LSM_ID_IPE		113
 
 /*
  * LSM_ATTR_XXX definitions identify different LSM attributes
-- 
cgit v1.2.3


From 6ad8bc92a47702f0b5d0b96b680199910b89f688 Mon Sep 17 00:00:00 2001
From: Christian Hopps <chopps@labn.net>
Date: Thu, 15 Aug 2024 13:21:14 -0400
Subject: net: add copy from skb_seq_state to buffer function

Add an skb helper function to copy a range of bytes from within
an existing skb_seq_state.

Signed-off-by: Christian Hopps <chopps@labn.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 29c3ea5b6e93..a871533b8568 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1433,6 +1433,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
 unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 			  struct skb_seq_state *st);
 void skb_abort_seq_read(struct skb_seq_state *st);
+int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len);
 
 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 			   unsigned int to, struct ts_config *config);
-- 
cgit v1.2.3


From 77c5e7b623032502ee49fe7e7868eaca6786d7ed Mon Sep 17 00:00:00 2001
From: Finley Xiao <finley.xiao@rock-chips.com>
Date: Wed, 14 Aug 2024 18:26:41 -0400
Subject: dt-bindings: power: Add support for RK3576 SoC

Define power domain IDs as described in the TRM and add compatible for
rockchip,rk3576-power-controller

Signed-off-by: Finley Xiao <finley.xiao@rock-chips.com>
Co-Developed-by: Detlev Casanova <detlev.casanova@collabora.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240814222824.3170-2-detlev.casanova@collabora.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/dt-bindings/power/rockchip,rk3576-power.h | 30 +++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/dt-bindings/power/rockchip,rk3576-power.h

(limited to 'include')

diff --git a/include/dt-bindings/power/rockchip,rk3576-power.h b/include/dt-bindings/power/rockchip,rk3576-power.h
new file mode 100644
index 000000000000..324a056aa851
--- /dev/null
+++ b/include/dt-bindings/power/rockchip,rk3576-power.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+#ifndef __DT_BINDINGS_POWER_RK3576_POWER_H__
+#define __DT_BINDINGS_POWER_RK3576_POWER_H__
+
+/* VD_NPU */
+#define RK3576_PD_NPU		0
+#define RK3576_PD_NPUTOP	1
+#define RK3576_PD_NPU0		2
+#define RK3576_PD_NPU1		3
+
+/* VD_GPU */
+#define RK3576_PD_GPU		4
+
+/* VD_LOGIC */
+#define RK3576_PD_NVM		5
+#define RK3576_PD_SDGMAC	6
+#define RK3576_PD_USB		7
+#define RK3576_PD_PHP		8
+#define RK3576_PD_SUBPHP	9
+#define RK3576_PD_AUDIO		10
+#define RK3576_PD_VEPU0		11
+#define RK3576_PD_VEPU1		12
+#define RK3576_PD_VPU		13
+#define RK3576_PD_VDEC		14
+#define RK3576_PD_VI		15
+#define RK3576_PD_VO0		16
+#define RK3576_PD_VO1		17
+#define RK3576_PD_VOP		18
+
+#endif
-- 
cgit v1.2.3


From b6cee6544d01b8ac01232d343b1b2b594d94d61c Mon Sep 17 00:00:00 2001
From: Dikshita Agarwal <quic_dikshita@quicinc.com>
Date: Mon, 19 Aug 2024 15:59:09 +0530
Subject: PM: domains: add device managed version of
 dev_pm_domain_attach|detach_list()

Add the devres-enabled version of dev_pm_domain_attach|detach_list.
If client drivers use devm_pm_domain_attach_list() to attach the PM domains,
devm_pm_domain_detach_list() will be invoked implicitly during remove phase.

Signed-off-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
Link: https://lore.kernel.org/r/1724063350-11993-2-git-send-email-quic_dikshita@quicinc.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_domain.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b86bb52858ac..b637ec14025f 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -476,6 +476,9 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev,
 int dev_pm_domain_attach_list(struct device *dev,
 			      const struct dev_pm_domain_attach_data *data,
 			      struct dev_pm_domain_list **list);
+int devm_pm_domain_attach_list(struct device *dev,
+			       const struct dev_pm_domain_attach_data *data,
+			       struct dev_pm_domain_list **list);
 void dev_pm_domain_detach(struct device *dev, bool power_off);
 void dev_pm_domain_detach_list(struct dev_pm_domain_list *list);
 int dev_pm_domain_start(struct device *dev);
@@ -502,6 +505,14 @@ static inline int dev_pm_domain_attach_list(struct device *dev,
 {
 	return 0;
 }
+
+static inline int devm_pm_domain_attach_list(struct device *dev,
+					     const struct dev_pm_domain_attach_data *data,
+					     struct dev_pm_domain_list **list)
+{
+	return 0;
+}
+
 static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
 static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {}
 static inline int dev_pm_domain_start(struct device *dev)
-- 
cgit v1.2.3


From d386d59b7c1a39112ca875327339ed519df2b96c Mon Sep 17 00:00:00 2001
From: Wen Gu <guwen@linux.alibaba.com>
Date: Wed, 14 Aug 2024 21:08:26 +0800
Subject: net/smc: introduce statistics for allocated ringbufs of link group

Currently we have the statistics on sndbuf/RMB sizes of all connections
that have ever been on the link group, namely smc_stats_memsize. However
these statistics are incremental and since the ringbufs of link group
are allowed to be reused, we cannot know the actual allocated buffers
through these. So here introduces the statistic on actual allocated
ringbufs of the link group, it will be incremented when a new ringbuf is
added into buf_list and decremented when it is deleted from buf_list.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/smc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index b531e3ef011a..9b74ef79070a 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -127,6 +127,8 @@ enum {
 	SMC_NLA_LGR_R_NET_COOKIE,	/* u64 */
 	SMC_NLA_LGR_R_PAD,		/* flag */
 	SMC_NLA_LGR_R_BUF_TYPE,		/* u8 */
+	SMC_NLA_LGR_R_SNDBUF_ALLOC,	/* uint */
+	SMC_NLA_LGR_R_RMB_ALLOC,	/* uint */
 	__SMC_NLA_LGR_R_MAX,
 	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
 };
@@ -162,6 +164,8 @@ enum {
 	SMC_NLA_LGR_D_V2_COMMON,	/* nest */
 	SMC_NLA_LGR_D_EXT_GID,		/* u64 */
 	SMC_NLA_LGR_D_PEER_EXT_GID,	/* u64 */
+	SMC_NLA_LGR_D_SNDBUF_ALLOC,	/* uint */
+	SMC_NLA_LGR_D_DMB_ALLOC,	/* uint */
 	__SMC_NLA_LGR_D_MAX,
 	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
 };
-- 
cgit v1.2.3


From e0d103542b06c36240e3887edfe49578464866eb Mon Sep 17 00:00:00 2001
From: Wen Gu <guwen@linux.alibaba.com>
Date: Wed, 14 Aug 2024 21:08:27 +0800
Subject: net/smc: introduce statistics for ringbufs usage of net namespace

The buffer size histograms in smc_stats, namely rx/tx_rmbsize, record
the sizes of ringbufs for all connections that have ever appeared in
the net namespace. They are incremental and we cannot know the actual
ringbufs usage from these. So here introduces statistics for current
ringbufs usage of existing smc connections in the net namespace into
smc_stats, it will be incremented when new connection uses a ringbuf
and decremented when the ringbuf is unused.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/smc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 9b74ef79070a..1f58cb0c266b 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -253,6 +253,8 @@ enum {
 	SMC_NLA_STATS_T_TX_BYTES,	/* u64 */
 	SMC_NLA_STATS_T_RX_CNT,		/* u64 */
 	SMC_NLA_STATS_T_TX_CNT,		/* u64 */
+	SMC_NLA_STATS_T_RX_RMB_USAGE,	/* uint */
+	SMC_NLA_STATS_T_TX_RMB_USAGE,	/* uint */
 	__SMC_NLA_STATS_T_MAX,
 	SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1
 };
-- 
cgit v1.2.3


From 7ea0522ef81a335c2d3a0ab1c8a4fab9a23c4a03 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 20 Aug 2024 11:56:12 +0200
Subject: netfilter: nf_tables: pass context structure to
 nft_parse_register_load

Mechanical transformation, no logical changes intended.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2be4738eae1c..573995e7a27e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -254,7 +254,8 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
 int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
 
-int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len);
+int nft_parse_register_load(const struct nft_ctx *ctx,
+			    const struct nlattr *attr, u8 *sreg, u32 len);
 int nft_parse_register_store(const struct nft_ctx *ctx,
 			     const struct nlattr *attr, u8 *dreg,
 			     const struct nft_data *data,
-- 
cgit v1.2.3


From 14fb07130c7ddd257e30079b87499b3f89097b09 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 20 Aug 2024 11:56:13 +0200
Subject: netfilter: nf_tables: allow loads only when register is initialized

Reject rules where a load occurs from a register that has not seen a store
early in the same rule.

commit 4c905f6740a3 ("netfilter: nf_tables: initialize registers in
nft_do_chain()")
had to add a unconditional memset to the nftables register space to avoid
leaking stack information to userspace.

This memset shows up in benchmarks.  After this change, this commit can
be reverted again.

Note that this breaks userspace compatibility, because theoretically
you can do

  rule 1: reg2 := meta load iif, reg2  == 1 jump ...
  rule 2: reg2 == 2 jump ...   // read access with no store in this rule

... after this change this is rejected.

Neither nftables nor iptables-nft generate such rules, each rule is
always standalone.

This resuts in a small increase of nft_ctx structure by sizeof(long).

To cope with hypothetical rulesets like the example above one could emit
on-demand "reg[x] = 0" store when generating the datapath blob in
nf_tables_commit_chain_prepare().

A patch that does this is linked to below.

For now, lets disable this.  In nf_tables, a rule is the smallest
unit that can be replaced from userspace, i.e. a hypothetical ruleset
that relies on earlier initialisations of registers can't be changed
at will as register usage would need to be coordinated.

Link: https://lore.kernel.org/netfilter-devel/20240627135330.17039-4-fw@strlen.de/
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 573995e7a27e..1cc33d946d41 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -221,6 +221,7 @@ struct nft_ctx {
 	u8				family;
 	u8				level;
 	bool				report;
+	DECLARE_BITMAP(reg_inited, NFT_REG32_NUM);
 };
 
 enum nft_data_desc_flags {
-- 
cgit v1.2.3


From ea72ce5da22806d5713f3ffb39a6d5ae73841f93 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 14 Aug 2024 00:29:36 +0200
Subject: x86/kaslr: Expose and use the end of the physical memory address
 space

iounmap() on x86 occasionally fails to unmap because the provided valid
ioremap address is not below high_memory. It turned out that this
happens due to KASLR.

KASLR uses the full address space between PAGE_OFFSET and vaddr_end to
randomize the starting points of the direct map, vmalloc and vmemmap
regions.  It thereby limits the size of the direct map by using the
installed memory size plus an extra configurable margin for hot-plug
memory.  This limitation is done to gain more randomization space
because otherwise only the holes between the direct map, vmalloc,
vmemmap and vaddr_end would be usable for randomizing.

The limited direct map size is not exposed to the rest of the kernel, so
the memory hot-plug and resource management related code paths still
operate under the assumption that the available address space can be
determined with MAX_PHYSMEM_BITS.

request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1
downwards.  That means the first allocation happens past the end of the
direct map and if unlucky this address is in the vmalloc space, which
causes high_memory to become greater than VMALLOC_START and consequently
causes iounmap() to fail for valid ioremap addresses.

MAX_PHYSMEM_BITS cannot be changed for that because the randomization
does not align with address bit boundaries and there are other places
which actually require to know the maximum number of address bits.  All
remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found
to be correct.

Cure this by exposing the end of the direct map via PHYSMEM_END and use
that for the memory hot-plug and resource management related places
instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END
maps to a variable which is initialized by the KASLR initialization and
otherwise it is based on MAX_PHYSMEM_BITS as before.

To prevent future hickups add a check into add_pages() to catch callers
trying to add memory above PHYSMEM_END.

Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions")
Reported-by: Max Ramanouski <max8rr8@gmail.com>
Reported-by: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-By: Max Ramanouski <max8rr8@gmail.com>
Tested-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4b238a20b76..b3864156eaa4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,6 +97,10 @@ extern const int mmap_rnd_compat_bits_max;
 extern int mmap_rnd_compat_bits __read_mostly;
 #endif
 
+#ifndef PHYSMEM_END
+# define PHYSMEM_END	((1ULL << MAX_PHYSMEM_BITS) - 1)
+#endif
+
 #include <asm/page.h>
 #include <asm/processor.h>
 
-- 
cgit v1.2.3


From 1fa3314c14c6a20d098991a0a6980f9b18b2f930 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 14 Aug 2024 15:52:24 +0300
Subject: ipv4: Centralize TOS matching

The TOS field in the IPv4 flow information structure ('flowi4_tos') is
matched by the kernel against the TOS selector in IPv4 rules and routes.
The field is initialized differently by different call sites. Some treat
it as DSCP (RFC 2474) and initialize all six DSCP bits, some treat it as
RFC 1349 TOS and initialize it using RT_TOS() and some treat it as RFC
791 TOS and initialize it using IPTOS_RT_MASK.

What is common to all these call sites is that they all initialize the
lower three DSCP bits, which fits the TOS definition in the initial IPv4
specification (RFC 791).

Therefore, the kernel only allows configuring IPv4 FIB rules that match
on the lower three DSCP bits which are always guaranteed to be
initialized by all call sites:

 # ip -4 rule add tos 0x1c table 100
 # ip -4 rule add tos 0x3c table 100
 Error: Invalid tos.

While this works, it is unlikely to be very useful. RFC 791 that
initially defined the TOS and IP precedence fields was updated by RFC
2474 over twenty five years ago where these fields were replaced by a
single six bits DSCP field.

Extending FIB rules to match on DSCP can be done by adding a new DSCP
selector while maintaining the existing semantics of the TOS selector
for applications that rely on that.

A prerequisite for allowing FIB rules to match on DSCP is to adjust all
the call sites to initialize the high order DSCP bits and remove their
masking along the path to the core where the field is matched on.

However, making this change alone will result in a behavior change. For
example, a forwarded IPv4 packet with a DS field of 0xfc will no longer
match a FIB rule that was configured with 'tos 0x1c'.

This behavior change can be avoided by masking the upper three DSCP bits
in 'flowi4_tos' before comparing it against the TOS selectors in FIB
rules and routes.

Implement the above by adding a new function that checks whether a given
DSCP value matches the one specified in the IPv4 flow information
structure and invoke it from the three places that currently match on
'flowi4_tos'.

Use RT_TOS() for the masking of 'flowi4_tos' instead of IPTOS_RT_MASK
since the latter is not uAPI and we should be able to remove it at some
point.

Include <linux/ip.h> in <linux/in_route.h> since the former defines
IPTOS_TOS_MASK which is used in the definition of RT_TOS() in
<linux/in_route.h>.

No regressions in FIB tests:

 # ./fib_tests.sh
 [...]
 Tests passed: 218
 Tests failed:   0

And FIB rule tests:

 # ./fib_rule_tests.sh
 [...]
 Tests passed: 116
 Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ip_fib.h          | 6 ++++++
 include/uapi/linux/in_route.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 72af2f223e59..269ec10f63e4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/refcount.h>
+#include <linux/in_route.h>
 
 struct fib_config {
 	u8			fc_dst_len;
@@ -434,6 +435,11 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
+static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
+{
+	return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
+}
+
 /* Exported by fib_frontend.c */
 extern const struct nla_policy rtm_ipv4_policy[];
 void ip_fib_init(void);
diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h
index 0cc2c23b47f8..10bdd7e7107f 100644
--- a/include/uapi/linux/in_route.h
+++ b/include/uapi/linux/in_route.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_IN_ROUTE_H
 #define _LINUX_IN_ROUTE_H
 
+#include <linux/ip.h>
+
 /* IPv4 routing cache flags */
 
 #define RTCF_DEAD	RTNH_F_DEAD
-- 
cgit v1.2.3


From acb0184fe9bca3bb7103dadc76ba9d38c969ca86 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:53 +0100
Subject: coresight: Move struct coresight_trace_id_map to common header

The trace ID maps will need to be created and stored by the core and
Perf code so move the definition up to the common header.

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-12-james.clark@linaro.org
---
 include/linux/coresight.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index f09ace92176e..c16c61a8411d 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -218,6 +218,24 @@ struct coresight_sysfs_link {
 	const char *target_name;
 };
 
+/* architecturally we have 128 IDs some of which are reserved */
+#define CORESIGHT_TRACE_IDS_MAX 128
+
+/**
+ * Trace ID map.
+ *
+ * @used_ids:	Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
+ *		Initialised so that the reserved IDs are permanently marked as
+ *		in use.
+ * @pend_rel_ids: CPU IDs that have been released by the trace source but not
+ *		  yet marked as available, to allow re-allocation to the same
+ *		  CPU during a perf session.
+ */
+struct coresight_trace_id_map {
+	DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
+	DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
+};
+
 /**
  * struct coresight_device - representation of a device as used by the framework
  * @pdata:	Platform data with device connections associated to this device.
-- 
cgit v1.2.3


From d53c8253c7822fbc524adcd950e7c6de6a229c99 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:55 +0100
Subject: coresight: Make CPU id map a property of a trace ID map

The global CPU ID mappings won't work for per-sink ID maps so move it to
the ID map struct. coresight_trace_id_release_all_pending() is hard
coded to operate on the default map, but once Perf sessions use their
own maps the pending release mechanism will be deleted. So it doesn't
need to be extended to accept a trace ID map argument at this point.

Signed-off-by: James Clark <james.clark@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-14-james.clark@linaro.org
---
 include/linux/coresight.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index c16c61a8411d..7d62b88bfb5c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -234,6 +234,7 @@ struct coresight_sysfs_link {
 struct coresight_trace_id_map {
 	DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
 	DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
+	atomic_t __percpu *cpu_map;
 };
 
 /**
-- 
cgit v1.2.3


From 5ad628a7617607baac53745f8025638b967470a2 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:56 +0100
Subject: coresight: Use per-sink trace ID maps for Perf sessions

This will allow sessions with more than CORESIGHT_TRACE_IDS_MAX ETMs
as long as there are fewer than that many ETMs connected to each sink.

Each sink owns its own trace ID map, and any Perf session connecting to
that sink will allocate from it, even if the sink is currently in use by
other users. This is similar to the existing behavior where the dynamic
trace IDs are constant as long as there is any concurrent Perf session
active. It's not completely optimal because slightly more IDs will be
used than necessary, but the optimal solution involves tracking the PIDs
of each session and allocating ID maps based on the session owner. This
is difficult to do with the combination of per-thread and per-cpu modes
and some scheduling issues. The complexity of this isn't likely to worth
it because even with multiple users they'd just see a difference in the
ordering of ID allocations rather than hitting any limits (unless the
hardware does have too many ETMs connected to one sink).

Signed-off-by: James Clark <james.clark@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-15-james.clark@linaro.org
---
 include/linux/coresight.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 7d62b88bfb5c..9c3067e2e38b 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -290,6 +290,7 @@ struct coresight_device {
 	bool sysfs_sink_activated;
 	struct dev_ext_attribute *ea;
 	struct coresight_device *def_sink;
+	struct coresight_trace_id_map perf_sink_id_map;
 	/* sysfs links between components */
 	int nr_links;
 	bool has_conns_grp;
@@ -384,7 +385,7 @@ struct coresight_ops_link {
 struct coresight_ops_source {
 	int (*cpu_id)(struct coresight_device *csdev);
 	int (*enable)(struct coresight_device *csdev, struct perf_event *event,
-		      enum cs_mode mode);
+		      enum cs_mode mode, struct coresight_trace_id_map *id_map);
 	void (*disable)(struct coresight_device *csdev,
 			struct perf_event *event);
 };
-- 
cgit v1.2.3


From de0029fdde86092c75472c92e56a962f4edee0f6 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:57 +0100
Subject: coresight: Remove pending trace ID release mechanism

Pending the release of IDs was a way of managing concurrent sysfs and
Perf sessions in a single global ID map. Perf may have finished while
sysfs hadn't, and Perf shouldn't release the IDs in use by sysfs and
vice versa.

Now that Perf uses its own exclusive ID maps, pending release doesn't
result in any different behavior than just releasing all IDs when the
last Perf session finishes. As part of the per-sink trace ID change, we
would have still had to make the pending mechanism work on a per-sink
basis, due to the overlapping ID allocations, so instead of making that
more complicated, just remove it.

Signed-off-by: James Clark <james.clark@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-16-james.clark@linaro.org
---
 include/linux/coresight.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 9c3067e2e38b..197949fd2c35 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -227,14 +227,12 @@ struct coresight_sysfs_link {
  * @used_ids:	Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
  *		Initialised so that the reserved IDs are permanently marked as
  *		in use.
- * @pend_rel_ids: CPU IDs that have been released by the trace source but not
- *		  yet marked as available, to allow re-allocation to the same
- *		  CPU during a perf session.
+ * @perf_cs_etm_session_active: Number of Perf sessions using this ID map.
  */
 struct coresight_trace_id_map {
 	DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
-	DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
 	atomic_t __percpu *cpu_map;
+	atomic_t perf_cs_etm_session_active;
 };
 
 /**
-- 
cgit v1.2.3


From 487eec8da80aef16229d30429f1b26090b1bf0eb Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:58 +0100
Subject: coresight: Emit sink ID in the HW_ID packets

For Perf to be able to decode when per-sink trace IDs are used, emit the
sink that's being written to for each ETM.

Perf currently errors out if it sees a newer packet version so instead
of bumping it, add a new minor version field. This can be used to
signify new versions that have backwards compatible fields. Considering
this change is only for high core count machines, it doesn't make sense
to make a breaking change for everyone.

Signed-off-by: James Clark <james.clark@arm.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-17-james.clark@linaro.org
---
 include/linux/coresight-pmu.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index 51ac441a37c3..89b0ac0014b0 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -49,12 +49,21 @@
  * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
  * Used to associate a CPU with the CoreSight Trace ID.
  * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
- * [59:08] - Unused (SBZ)
- * [63:60] - Version
+ * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
+ *	      Added in minor version 1.
+ * [55:40] - Unused (SBZ)
+ * [59:56] - Minor Version - previously existing fields are compatible with
+ *	      all minor versions.
+ * [63:60] - Major Version - previously existing fields mean different things
+ *	      in new major versions.
  */
 #define CS_AUX_HW_ID_TRACE_ID_MASK	GENMASK_ULL(7, 0)
-#define CS_AUX_HW_ID_VERSION_MASK	GENMASK_ULL(63, 60)
+#define CS_AUX_HW_ID_SINK_ID_MASK	GENMASK_ULL(39, 8)
 
-#define CS_AUX_HW_ID_CURR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION_MASK	GENMASK_ULL(59, 56)
+#define CS_AUX_HW_ID_MAJOR_VERSION_MASK	GENMASK_ULL(63, 60)
+
+#define CS_AUX_HW_ID_MAJOR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION 1
 
 #endif
-- 
cgit v1.2.3


From 988d40a4d4e7d671305bea501562a5d1a1d479fa Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 22 Jul 2024 11:11:59 +0100
Subject: coresight: Make trace ID map spinlock local to the map

Reduce contention on the lock by replacing the global lock with one for
each map.

Signed-off-by: James Clark <james.clark@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240722101202.26915-18-james.clark@linaro.org
---
 include/linux/coresight.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 197949fd2c35..c13342594278 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -233,6 +233,7 @@ struct coresight_trace_id_map {
 	DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
 	atomic_t __percpu *cpu_map;
 	atomic_t perf_cs_etm_session_active;
+	spinlock_t lock;
 };
 
 /**
-- 
cgit v1.2.3


From 7c432a18ad216b4f7b08e93287586d60e12a3b7b Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 20 Aug 2024 15:27:55 +0100
Subject: firmware: arm_ffa: Update the FF-A command list with v1.2 additions

Arm Firmware Framework for A-profile(FFA) v1.2 introduces register based
discovery mechanism and direct messaging extensions that enables to target
specific UUID within a partition.

Let us add all the newly supported FF-A function IDs in the spec.
Also update to the error values and associated handling.

Message-Id: <20240820-ffa_v1-2-v2-2-18c0c5f3c65e@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/arm_ffa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 89683f31ae12..b34f0c0dc2c5 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -73,6 +73,11 @@
 #define FFA_FN64_MEM_PERM_GET		FFA_SMC_64(0x88)
 #define FFA_MEM_PERM_SET		FFA_SMC_32(0x89)
 #define FFA_FN64_MEM_PERM_SET		FFA_SMC_64(0x89)
+#define FFA_CONSOLE_LOG			FFA_SMC_32(0x8A)
+#define FFA_PARTITION_INFO_GET_REGS	FFA_SMC_64(0x8B)
+#define FFA_EL3_INTR_HANDLE		FFA_SMC_32(0x8C)
+#define FFA_MSG_SEND_DIRECT_REQ2	FFA_SMC_64(0x8D)
+#define FFA_MSG_SEND_DIRECT_RESP2	FFA_SMC_64(0x8E)
 
 /*
  * For some calls it is necessary to use SMC64 to pass or return 64-bit values.
-- 
cgit v1.2.3


From aaef3bc98129c86078b336f16788dd733b0728a4 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 20 Aug 2024 15:27:58 +0100
Subject: firmware: arm_ffa: Add support for FFA_MSG_SEND_DIRECT_{REQ,RESP}2

FFA_MSG_SEND_DIRECT_{REQ,RESP} supported only x3-x7 to pass implementation
defined values as part of the message. This may not be sufficient sometimes
and also it would be good to use all the registers supported by SMCCC v1.2
(x0-x17) for such register based communication.

Also another limitation with the FFA_MSG_SEND_DIRECT_{REQ,RESP} is the
ability to target a specific service within the partition based on it's
UUID.

In order to address both of the above limitation, FF-A v1.2 introduced
FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which has the ability to target the
message to a specific service based on its UUID within a partition as
well as utilise all the available registers(x4-x17 specifically) for
the communication.

This change adds support for FFA_MSG_SEND_DIRECT_REQ2 and
FFA_MSG_SEND_DIRECT_RESP2.

Message-Id: <20240820-ffa_v1-2-v2-5-18c0c5f3c65e@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/arm_ffa.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index b34f0c0dc2c5..a28e2a6a13d0 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -270,6 +270,11 @@ struct ffa_indirect_msg_hdr {
 	u32 size;
 };
 
+/* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which pass data via registers */
+struct ffa_send_direct_data2 {
+	unsigned long data[14]; /* x4-x17 */
+};
+
 struct ffa_mem_region_addr_range {
 	/* The base IPA of the constituent memory region, aligned to 4 kiB */
 	u64 address;
@@ -431,6 +436,8 @@ struct ffa_msg_ops {
 	int (*sync_send_receive)(struct ffa_device *dev,
 				 struct ffa_send_direct_data *data);
 	int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz);
+	int (*sync_send_receive2)(struct ffa_device *dev, const uuid_t *uuid,
+				  struct ffa_send_direct_data2 *data);
 };
 
 struct ffa_mem_ops {
-- 
cgit v1.2.3


From e68ac2b488495fa4d127d6105ce633849859957a Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Thu, 15 Aug 2024 11:15:40 -0600
Subject: softirq: Remove unused 'action' parameter from action callback

When soft interrupt actions are called, they are passed a pointer to the
struct softirq action which contains the action's function pointer.

This pointer isn't useful, as the action callback already knows what
function it is. And since each callback handles a specific soft interrupt,
the callback also knows which soft interrupt number is running.

No soft interrupt action callback actually uses this parameter, so remove
it from the function pointer signature. This clarifies that soft interrupt
actions are global routines and makes it slightly cheaper to call them.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/all/20240815171549.3260003-1-csander@purestorage.com
---
 include/linux/interrupt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 3f30c88e0b4c..694de61e0b38 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -594,7 +594,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS];
 
 struct softirq_action
 {
-	void	(*action)(struct softirq_action *);
+	void	(*action)(void);
 };
 
 asmlinkage void do_softirq(void);
@@ -609,7 +609,7 @@ static inline void do_softirq_post_smp_call_flush(unsigned int unused)
 }
 #endif
 
-extern void open_softirq(int nr, void (*action)(struct softirq_action *));
+extern void open_softirq(int nr, void (*action)(void));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
 
-- 
cgit v1.2.3


From 2fea0c26b82f304f43b3905e56d954cf98a6d0e9 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:19 -0700
Subject: initramfs,lsm: add a security hook to do_populate_rootfs()

This patch introduces a new hook to notify security system that the
content of initramfs has been unpacked into the rootfs.

Upon receiving this notification, the security system can activate
a policy to allow only files that originated from the initramfs to
execute or load into kernel during the early stages of booting.

This approach is crucial for minimizing the attack surface by
ensuring that only trusted files from the initramfs are operational
in the critical boot phase.

Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 2 ++
 include/linux/security.h      | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 520730fe2d94..22a14fc794fe 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -449,3 +449,5 @@ LSM_HOOK(int, 0, uring_override_creds, const struct cred *new)
 LSM_HOOK(int, 0, uring_sqpoll, void)
 LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
 #endif /* CONFIG_IO_URING */
+
+LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
diff --git a/include/linux/security.h b/include/linux/security.h
index 62233fec8ead..3298855abdbc 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2256,4 +2256,12 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd)
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_IO_URING */
 
+#ifdef CONFIG_SECURITY
+extern void security_initramfs_populated(void);
+#else
+static inline void security_initramfs_populated(void)
+{
+}
+#endif /* CONFIG_SECURITY */
+
 #endif /* ! __LINUX_SECURITY_H */
-- 
cgit v1.2.3


From 7138679ff2a2b1674f16618558d6cabea6ab2c53 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:21 -0700
Subject: lsm: add new securityfs delete function

When deleting a directory in the security file system, the existing
securityfs_remove requires the directory to be empty, otherwise
it will do nothing. This leads to a potential risk that the security
file system might be in an unclean state when the intended deletion
did not happen.

This commit introduces a new function securityfs_recursive_remove
to recursively delete a directory without leaving an unclean state.

Co-developed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 3298855abdbc..f6d2bc69cfa6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2090,6 +2090,7 @@ struct dentry *securityfs_create_symlink(const char *name,
 					 const char *target,
 					 const struct inode_operations *iops);
 extern void securityfs_remove(struct dentry *dentry);
+extern void securityfs_recursive_remove(struct dentry *dentry);
 
 #else /* CONFIG_SECURITYFS */
 
-- 
cgit v1.2.3


From f44554b5067b36c14cc91ed811fa1bd58baed34a Mon Sep 17 00:00:00 2001
From: Deven Bowers <deven.desai@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:23 -0700
Subject: audit,ipe: add IPE auditing support

Users of IPE require a way to identify when and why an operation fails,
allowing them to both respond to violations of policy and be notified
of potentially malicious actions on their systems with respect to IPE
itself.

This patch introduces 3 new audit events.

AUDIT_IPE_ACCESS(1420) indicates the result of an IPE policy evaluation
of a resource.
AUDIT_IPE_CONFIG_CHANGE(1421) indicates the current active IPE policy
has been changed to another loaded policy.
AUDIT_IPE_POLICY_LOAD(1422) indicates a new IPE policy has been loaded
into the kernel.

This patch also adds support for success auditing, allowing users to
identify why an allow decision was made for a resource. However, it is
recommended to use this option with caution, as it is quite noisy.

Here are some examples of the new audit record types:

AUDIT_IPE_ACCESS(1420):

    audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1
      pid=297 comm="sh" path="/root/vol/bin/hello" dev="tmpfs"
      ino=3897 rule="op=EXECUTE boot_verified=TRUE action=ALLOW"

    audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1
      pid=299 comm="sh" path="/mnt/ipe/bin/hello" dev="dm-0"
      ino=2 rule="DEFAULT action=DENY"

    audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1
     pid=300 path="/tmp/tmpdp2h1lub/deny/bin/hello" dev="tmpfs"
      ino=131 rule="DEFAULT action=DENY"

The above three records were generated when the active IPE policy only
allows binaries from the initramfs to run. The three identical `hello`
binary were placed at different locations, only the first hello from
the rootfs(initramfs) was allowed.

Field ipe_op followed by the IPE operation name associated with the log.

Field ipe_hook followed by the name of the LSM hook that triggered the IPE
event.

Field enforcing followed by the enforcement state of IPE. (it will be
introduced in the next commit)

Field pid followed by the pid of the process that triggered the IPE
event.

Field comm followed by the command line program name of the process that
triggered the IPE event.

Field path followed by the file's path name.

Field dev followed by the device name as found in /dev where the file is
from.
Note that for device mappers it will use the name `dm-X` instead of
the name in /dev/mapper.
For a file in a temp file system, which is not from a device, it will use
`tmpfs` for the field.
The implementation of this part is following another existing use case
LSM_AUDIT_DATA_INODE in security/lsm_audit.c

Field ino followed by the file's inode number.

Field rule followed by the IPE rule made the access decision. The whole
rule must be audited because the decision is based on the combination of
all property conditions in the rule.

Along with the syscall audit event, user can know why a blocked
happened. For example:

    audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1
      pid=2138 comm="bash" path="/mnt/ipe/bin/hello" dev="dm-0"
      ino=2 rule="DEFAULT action=DENY"
    audit[1956]: SYSCALL arch=c000003e syscall=59
      success=no exit=-13 a0=556790138df0 a1=556790135390 a2=5567901338b0
      a3=ab2a41a67f4f1f4e items=1 ppid=147 pid=1956 auid=4294967295 uid=0
      gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0
      ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null)

The above two records showed bash used execve to run "hello" and got
blocked by IPE. Note that the IPE records are always prior to a SYSCALL
record.

AUDIT_IPE_CONFIG_CHANGE(1421):

    audit: AUDIT1421
      old_active_pol_name="Allow_All" old_active_pol_version=0.0.0
      old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649
      new_active_pol_name="boot_verified" new_active_pol_version=0.0.0
      new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F
      auid=4294967295 ses=4294967295 lsm=ipe res=1

The above record showed the current IPE active policy switch from
`Allow_All` to `boot_verified` along with the version and the hash
digest of the two policies. Note IPE can only have one policy active
at a time, all access decision evaluation is based on the current active
policy.
The normal procedure to deploy a policy is loading the policy to deploy
into the kernel first, then switch the active policy to it.

AUDIT_IPE_POLICY_LOAD(1422):

    audit: AUDIT1422 policy_name="boot_verified" policy_version=0.0.0
      policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F2676
      auid=4294967295 ses=4294967295 lsm=ipe res=1

The above record showed a new policy has been loaded into the kernel
with the policy name, policy version and policy hash.

Signed-off-by: Deven Bowers <deven.desai@linux.microsoft.com>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index d676ed2b246e..75e21a135483 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -143,6 +143,9 @@
 #define AUDIT_MAC_UNLBL_STCDEL	1417	/* NetLabel: del a static label */
 #define AUDIT_MAC_CALIPSO_ADD	1418	/* NetLabel: add CALIPSO DOI entry */
 #define AUDIT_MAC_CALIPSO_DEL	1419	/* NetLabel: del CALIPSO DOI entry */
+#define AUDIT_IPE_ACCESS	1420	/* IPE denial or grant */
+#define AUDIT_IPE_CONFIG_CHANGE	1421	/* IPE config change */
+#define AUDIT_IPE_POLICY_LOAD	1422	/* IPE policy load */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
-- 
cgit v1.2.3


From b55d26bd1891423a3cdccf816b386aec8bbefc87 Mon Sep 17 00:00:00 2001
From: Deven Bowers <deven.desai@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:25 -0700
Subject: block,lsm: add LSM blob and new LSM hooks for block devices

This patch introduces a new LSM blob to the block_device structure,
enabling the security subsystem to store security-sensitive data related
to block devices. Currently, for a device mapper's mapped device containing
a dm-verity target, critical security information such as the roothash and
its signing state are not readily accessible. Specifically, while the
dm-verity volume creation process passes the dm-verity roothash and its
signature from userspace to the kernel, the roothash is stored privately
within the dm-verity target, and its signature is discarded
post-verification. This makes it extremely hard for the security subsystem
to utilize these data.

With the addition of the LSM blob to the block_device structure, the
security subsystem can now retain and manage important security metadata
such as the roothash and the signing state of a dm-verity by storing them
inside the blob. Access decisions can then be based on these stored data.

The implementation follows the same approach used for security blobs in
other structures like struct file, struct inode, and struct superblock.
The initialization of the security blob occurs after the creation of the
struct block_device, performed by the security subsystem. Similarly, the
security blob is freed by the security subsystem before the struct
block_device is deallocated or freed.

This patch also introduces a new hook security_bdev_setintegrity() to save
block device's integrity data to the new LSM blob. For example, for
dm-verity, it can use this hook to expose its roothash and signing state
to LSMs, then LSMs can save these data into the LSM blob.

Please note that the new hook should be invoked every time the security
information is updated to keep these data current. For example, in
dm-verity, if the mapping table is reloaded and configured to use a
different dm-verity target with a new roothash and signing information,
the previously stored data in the LSM blob will become obsolete. It is
crucial to re-invoke the hook to refresh these data and ensure they are up
to date. This necessity arises from the design of device-mapper, where a
device-mapper device is first created, and then targets are subsequently
loaded into it. These targets can be modified multiple times during the
device's lifetime. Therefore, while the LSM blob is allocated during the
creation of the block device, its actual contents are not initialized at
this stage and can change substantially over time. This includes
alterations from data that the LSM 'trusts' to those it does not, making
it essential to handle these changes correctly. Failure to address this
dynamic aspect could potentially allow for bypassing LSM checks.

Signed-off-by: Deven Bowers <deven.desai@linux.microsoft.com>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: merge fuzz, subject line tweaks]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/blk_types.h     |  3 +++
 include/linux/lsm_hook_defs.h |  5 +++++
 include/linux/lsm_hooks.h     |  1 +
 include/linux/security.h      | 26 ++++++++++++++++++++++++++
 4 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 36ed96133217..413ebdff974b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -71,6 +71,9 @@ struct block_device {
 
 	struct partition_meta_info *bd_meta_info;
 	int			bd_writers;
+#ifdef CONFIG_SECURITY
+	void			*bd_security;
+#endif
 	/*
 	 * keep this out-of-line as it's both big and not needed in the fast
 	 * path
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 22a14fc794fe..860821f3bf6f 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -451,3 +451,8 @@ LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
 #endif /* CONFIG_IO_URING */
 
 LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
+
+LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev)
+LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev)
+LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev,
+	 enum lsm_integrity_type type, const void *value, size_t size)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 11ea0063228f..4687985b9175 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -83,6 +83,7 @@ struct lsm_blob_sizes {
 	int lbs_task;
 	int lbs_xattr_count; /* number of xattr slots in new_xattrs array */
 	int lbs_tun_dev;
+	int lbs_bdev;
 };
 
 /*
diff --git a/include/linux/security.h b/include/linux/security.h
index f6d2bc69cfa6..d7cab2d5002f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -83,6 +83,10 @@ enum lsm_event {
 	LSM_POLICY_CHANGE,
 };
 
+enum lsm_integrity_type {
+	__LSM_INT_MAX
+};
+
 /*
  * These are reasons that can be passed to the security_locked_down()
  * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
@@ -509,6 +513,11 @@ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
 int security_locked_down(enum lockdown_reason what);
 int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
 		      void *val, size_t val_len, u64 id, u64 flags);
+int security_bdev_alloc(struct block_device *bdev);
+void security_bdev_free(struct block_device *bdev);
+int security_bdev_setintegrity(struct block_device *bdev,
+			       enum lsm_integrity_type type, const void *value,
+			       size_t size);
 #else /* CONFIG_SECURITY */
 
 static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -1483,6 +1492,23 @@ static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int security_bdev_alloc(struct block_device *bdev)
+{
+	return 0;
+}
+
+static inline void security_bdev_free(struct block_device *bdev)
+{
+}
+
+static inline int security_bdev_setintegrity(struct block_device *bdev,
+					     enum lsm_integrity_type type,
+					     const void *value, size_t size)
+{
+	return 0;
+}
+
 #endif	/* CONFIG_SECURITY */
 
 #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
-- 
cgit v1.2.3


From a6af7bc3d72ff52c5526a392144347fcb3094149 Mon Sep 17 00:00:00 2001
From: Deven Bowers <deven.desai@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:26 -0700
Subject: dm-verity: expose root hash digest and signature data to LSMs

dm-verity provides a strong guarantee of a block device's integrity. As
a generic way to check the integrity of a block device, it provides
those integrity guarantees to its higher layers, including the filesystem
level.

However, critical security metadata like the dm-verity roothash and its
signing information are not easily accessible to the LSMs.
To address this limitation, this patch introduces a mechanism to store
and manage these essential security details within a newly added LSM blob
in the block_device structure.

This addition allows LSMs to make access control decisions on the integrity
data stored within the block_device, enabling more flexible security
policies. For instance, LSMs can now revoke access to dm-verity devices
based on their roothashes, ensuring that only authorized and verified
content is accessible. Additionally, LSMs can enforce policies to only
allow files from dm-verity devices that have a valid digital signature to
execute, effectively blocking any unsigned files from execution, thus
enhancing security against unauthorized modifications.

The patch includes new hook calls, `security_bdev_setintegrity()`, in
dm-verity to expose the dm-verity roothash and the roothash signature to
LSMs via preresume() callback. By using the preresume() callback, it
ensures that the security metadata is consistently in sync with the
metadata of the dm-verity target in the current active mapping table.
The hook calls are depended on CONFIG_SECURITY.

Signed-off-by: Deven Bowers <deven.desai@linux.microsoft.com>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
[PM: moved sig_size field as discussed]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index d7cab2d5002f..e383022467db 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -83,8 +83,15 @@ enum lsm_event {
 	LSM_POLICY_CHANGE,
 };
 
+struct dm_verity_digest {
+	const char *alg;
+	const u8 *digest;
+	size_t digest_len;
+};
+
 enum lsm_integrity_type {
-	__LSM_INT_MAX
+	LSM_INT_DMVERITY_SIG_VALID,
+	LSM_INT_DMVERITY_ROOTHASH,
 };
 
 /*
-- 
cgit v1.2.3


From fb55e177d5936fb80fb2586036d195c57e7f6892 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:28 -0700
Subject: lsm: add security_inode_setintegrity() hook

This patch introduces a new hook to save inode's integrity
data. For example, for fsverity enabled files, LSMs can use this hook to
save the existence of verified fsverity builtin signature into the inode's
security blob, and LSMs can make access decisions based on this data.

Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
[PM: subject line tweak, removed changelog]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h |  2 ++
 include/linux/security.h      | 10 ++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 860821f3bf6f..1d59513bf230 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -180,6 +180,8 @@ LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid)
 LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
 LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
 	 const char *name)
+LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode,
+	 enum lsm_integrity_type type, const void *value, size_t size)
 LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
 	 struct kernfs_node *kn)
 LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
diff --git a/include/linux/security.h b/include/linux/security.h
index e383022467db..97b7c57e6560 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -410,6 +410,9 @@ int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer
 void security_inode_getsecid(struct inode *inode, u32 *secid);
 int security_inode_copy_up(struct dentry *src, struct cred **new);
 int security_inode_copy_up_xattr(struct dentry *src, const char *name);
+int security_inode_setintegrity(const struct inode *inode,
+				enum lsm_integrity_type type, const void *value,
+				size_t size);
 int security_kernfs_init_security(struct kernfs_node *kn_dir,
 				  struct kernfs_node *kn);
 int security_file_permission(struct file *file, int mask);
@@ -1026,6 +1029,13 @@ static inline int security_inode_copy_up(struct dentry *src, struct cred **new)
 	return 0;
 }
 
+static inline int security_inode_setintegrity(const struct inode *inode,
+					      enum lsm_integrity_type type,
+					      const void *value, size_t size)
+{
+	return 0;
+}
+
 static inline int security_kernfs_init_security(struct kernfs_node *kn_dir,
 						struct kernfs_node *kn)
 {
-- 
cgit v1.2.3


From 7c373e4f1445263728d3eeab7e33e932c8f4a288 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@linux.microsoft.com>
Date: Fri, 2 Aug 2024 23:08:29 -0700
Subject: fsverity: expose verified fsverity built-in signatures to LSMs

This patch enhances fsverity's capabilities to support both integrity and
authenticity protection by introducing the exposure of built-in
signatures through a new LSM hook. This functionality allows LSMs,
e.g. IPE, to enforce policies based on the authenticity and integrity of
files, specifically focusing on built-in fsverity signatures. It enables
a policy enforcement layer within LSMs for fsverity, offering granular
control over the usage of authenticity claims. For instance, a policy
could be established to only permit the execution of all files with
verified built-in fsverity signatures.

The introduction of a security_inode_setintegrity() hook call within
fsverity's workflow ensures that the verified built-in signature of a file
is exposed to LSMs. This enables LSMs to recognize and label fsverity files
that contain a verified built-in fsverity signature. This hook is invoked
subsequent to the fsverity_verify_signature() process, guaranteeing the
signature's verification against fsverity's keyring. This mechanism is
crucial for maintaining system security, as it operates in kernel space,
effectively thwarting attempts by malicious binaries to bypass user space
stack interactions.

The second to last commit in this patch set will add a link to the IPE
documentation in fsverity.rst.

Signed-off-by: Deven Bowers <deven.desai@linux.microsoft.com>
Signed-off-by: Fan Wu <wufan@linux.microsoft.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 97b7c57e6560..c37c32ebbdcd 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -92,6 +92,7 @@ struct dm_verity_digest {
 enum lsm_integrity_type {
 	LSM_INT_DMVERITY_SIG_VALID,
 	LSM_INT_DMVERITY_ROOTHASH,
+	LSM_INT_FSVERITY_BUILTINSIG_VALID,
 };
 
 /*
-- 
cgit v1.2.3


From 7cff549daa67c7549c5a8688674cea2df8c2ec80 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 16 Aug 2024 17:43:05 +0200
Subject: kernel: Add helper macros for loop unrolling

This helps in easily initializing blocks of code (e.g. static calls and
keys).

UNROLL(N, MACRO, __VA_ARGS__) calls MACRO N times with the first
argument as the index of the iteration. This allows string pasting to
create unique tokens for variable names, function calls etc.

As an example:

	#include <linux/unroll.h>

	#define MACRO(N, a, b)            \
		int add_##N(int a, int b) \
		{                         \
			return a + b + N; \
		}

	UNROLL(2, MACRO, x, y)

expands to:

	int add_0(int x, int y)
	{
		return x + y + 0;
	}

	int add_1(int x, int y)
	{
		return x + y + 1;
	}

Tested-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Nacked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/unroll.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/unroll.h

(limited to 'include')

diff --git a/include/linux/unroll.h b/include/linux/unroll.h
new file mode 100644
index 000000000000..d42fd6366373
--- /dev/null
+++ b/include/linux/unroll.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2023 Google LLC.
+ */
+
+#ifndef __UNROLL_H
+#define __UNROLL_H
+
+#include <linux/args.h>
+
+#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
+
+#define __UNROLL_0(MACRO, args...)
+#define __UNROLL_1(MACRO, args...)  __UNROLL_0(MACRO, args)  MACRO(0, args)
+#define __UNROLL_2(MACRO, args...)  __UNROLL_1(MACRO, args)  MACRO(1, args)
+#define __UNROLL_3(MACRO, args...)  __UNROLL_2(MACRO, args)  MACRO(2, args)
+#define __UNROLL_4(MACRO, args...)  __UNROLL_3(MACRO, args)  MACRO(3, args)
+#define __UNROLL_5(MACRO, args...)  __UNROLL_4(MACRO, args)  MACRO(4, args)
+#define __UNROLL_6(MACRO, args...)  __UNROLL_5(MACRO, args)  MACRO(5, args)
+#define __UNROLL_7(MACRO, args...)  __UNROLL_6(MACRO, args)  MACRO(6, args)
+#define __UNROLL_8(MACRO, args...)  __UNROLL_7(MACRO, args)  MACRO(7, args)
+#define __UNROLL_9(MACRO, args...)  __UNROLL_8(MACRO, args)  MACRO(8, args)
+#define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args)  MACRO(9, args)
+#define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args)
+#define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args)
+#define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args)
+#define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args)
+#define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args)
+#define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args)
+#define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args)
+#define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args)
+#define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args)
+#define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args)
+
+#endif /* __UNROLL_H */
-- 
cgit v1.2.3


From 9b59a85a84dc37ca4f2c54df5e06aff4c1eae5d3 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 20 Aug 2024 12:38:08 -0700
Subject: workqueue: Don't call va_start / va_end twice

Calling va_start / va_end multiple times is undefined and causes
problems with certain compiler / platforms.

Change alloc_ordered_workqueue_lockdep_map to a macro and updated
__alloc_workqueue to take a va_list argument.

Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 04dff07a9e2b..f3cc15940b4d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -543,20 +543,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-__printf(1, 4) static inline struct workqueue_struct *
-alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags,
-				    struct lockdep_map *lockdep_map, ...)
-{
-	struct workqueue_struct *wq;
-	va_list args;
-
-	va_start(args, lockdep_map);
-	wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags,
-					 1, lockdep_map, args);
-	va_end(args);
-
-	return wq;
-}
+#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...)	\
+	alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args)
 #endif
 
 /**
-- 
cgit v1.2.3


From 051c86afc342aed1f84d66ff5d09dc9e1c1685a1 Mon Sep 17 00:00:00 2001
From: Tejas Vipin <tejasvipin76@gmail.com>
Date: Sun, 18 Aug 2024 11:38:15 +0530
Subject: drm/mipi-dsi: Add mipi_dsi_dcs_set_tear_scanline_multi

mipi_dsi_dcs_set_tear_scanline_multi can heavily benefit from being
converted to a multi style function as it is often called in the context of
similar functions.

Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Tejas Vipin <tejasvipin76@gmail.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240818060816.848784-2-tejasvipin76@gmail.com
---
 include/drm/drm_mipi_dsi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index b78aae45cae7..f725f8654611 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -375,6 +375,8 @@ void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx,
 					   u16 start, u16 end);
 void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx,
 					 u16 start, u16 end);
+void mipi_dsi_dcs_set_tear_scanline_multi(struct mipi_dsi_multi_context *ctx,
+					  u16 scanline);
 
 /**
  * mipi_dsi_generic_write_seq - transmit data using a generic write packet
-- 
cgit v1.2.3


From 13cfd6a6d7ac78e845768278ab745acbd19c43ce Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Sun, 18 Aug 2024 14:43:51 +0300
Subject: net: Silence false field-spanning write warning in metadata_dst
 memcpy

When metadata_dst struct is allocated (using metadata_dst_alloc()), it
reserves room for options at the end of the struct.

Change the memcpy() to unsafe_memcpy() as it is guaranteed that enough
room (md_size bytes) was allocated and the field-spanning write is
intentional.

This resolves the following warning:
	------------[ cut here ]------------
	memcpy: detected field-spanning write (size 104) of single field "&new_md->u.tun_info" at include/net/dst_metadata.h:166 (size 96)
	WARNING: CPU: 2 PID: 391470 at include/net/dst_metadata.h:166 tun_dst_unclone+0x114/0x138 [geneve]
	Modules linked in: act_tunnel_key geneve ip6_udp_tunnel udp_tunnel act_vlan act_mirred act_skbedit cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress sbsa_gwdt ipmi_devintf ipmi_msghandler xfrm_interface xfrm6_tunnel tunnel6 tunnel4 xfrm_user xfrm_algo nvme_fabrics overlay optee openvswitch nsh nf_conncount ib_srp scsi_transport_srp rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm libiscsi ib_ipoib scsi_transport_iscsi ib_cm uio_pdrv_genirq uio mlxbf_pmc pwr_mlxbf mlxbf_bootctl bluefield_edac nft_chain_nat binfmt_misc xt_MASQUERADE nf_nat xt_tcpmss xt_NFLOG nfnetlink_log xt_recent xt_hashlimit xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_mark xt_comment ipt_REJECT nf_reject_ipv4 nft_compat nf_tables nfnetlink sch_fq_codel dm_multipath fuse efi_pstore ip_tables btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor xor_neon raid6_pq raid1 raid0 nvme nvme_core mlx5_ib ib_uverbs ib_core ipv6 crc_ccitt mlx5_core crct10dif_ce mlxfw
	 psample i2c_mlxbf gpio_mlxbf2 mlxbf_gige mlxbf_tmfifo
	CPU: 2 PID: 391470 Comm: handler6 Not tainted 6.10.0-rc1 #1
	Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.5.0.12993 Dec  6 2023
	pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
	pc : tun_dst_unclone+0x114/0x138 [geneve]
	lr : tun_dst_unclone+0x114/0x138 [geneve]
	sp : ffffffc0804533f0
	x29: ffffffc0804533f0 x28: 000000000000024e x27: 0000000000000000
	x26: ffffffdcfc0e8e40 x25: ffffff8086fa6600 x24: ffffff8096a0c000
	x23: 0000000000000068 x22: 0000000000000008 x21: ffffff8092ad7000
	x20: ffffff8081e17900 x19: ffffff8092ad7900 x18: 00000000fffffffd
	x17: 0000000000000000 x16: ffffffdcfa018488 x15: 695f6e75742e753e
	x14: 2d646d5f77656e26 x13: 6d5f77656e262220 x12: 646c65696620656c
	x11: ffffffdcfbe33ae8 x10: ffffffdcfbe1baa8 x9 : ffffffdcfa0a4c10
	x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001
	x5 : ffffff83fdeeb010 x4 : 0000000000000000 x3 : 0000000000000027
	x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80913f6780
	Call trace:
	 tun_dst_unclone+0x114/0x138 [geneve]
	 geneve_xmit+0x214/0x10e0 [geneve]
	 dev_hard_start_xmit+0xc0/0x220
	 __dev_queue_xmit+0xa14/0xd38
	 dev_queue_xmit+0x14/0x28 [openvswitch]
	 ovs_vport_send+0x98/0x1c8 [openvswitch]
	 do_output+0x80/0x1a0 [openvswitch]
	 do_execute_actions+0x172c/0x1958 [openvswitch]
	 ovs_execute_actions+0x64/0x1a8 [openvswitch]
	 ovs_packet_cmd_execute+0x258/0x2d8 [openvswitch]
	 genl_family_rcv_msg_doit+0xc8/0x138
	 genl_rcv_msg+0x1ec/0x280
	 netlink_rcv_skb+0x64/0x150
	 genl_rcv+0x40/0x60
	 netlink_unicast+0x2e4/0x348
	 netlink_sendmsg+0x1b0/0x400
	 __sock_sendmsg+0x64/0xc0
	 ____sys_sendmsg+0x284/0x308
	 ___sys_sendmsg+0x88/0xf0
	 __sys_sendmsg+0x70/0xd8
	 __arm64_sys_sendmsg+0x2c/0x40
	 invoke_syscall+0x50/0x128
	 el0_svc_common.constprop.0+0x48/0xf0
	 do_el0_svc+0x24/0x38
	 el0_svc+0x38/0x100
	 el0t_64_sync_handler+0xc0/0xc8
	 el0t_64_sync+0x1a4/0x1a8
	---[ end trace 0000000000000000 ]---

Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Gal Pressman <gal@nvidia.com>
Link: https://patch.msgid.link/20240818114351.3612692-1-gal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dst_metadata.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 4160731dcb6e..84c15402931c 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -163,8 +163,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
 	if (!new_md)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
-	       sizeof(struct ip_tunnel_info) + md_size);
+	unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+		      sizeof(struct ip_tunnel_info) + md_size,
+		      /* metadata_dst_alloc() reserves room (md_size bytes) for
+		       * options right after the ip_tunnel_info struct.
+		       */);
 #ifdef CONFIG_DST_CACHE
 	/* Unclone the dst cache if there is one */
 	if (new_md->u.tun_info.dst_cache.cache) {
-- 
cgit v1.2.3


From 8fb9f31984bdc0aaa1b0f7c7e7a27bd3137f8744 Mon Sep 17 00:00:00 2001
From: Zhiguo Niu <zhiguo.niu@unisoc.com>
Date: Thu, 1 Aug 2024 09:33:51 +0800
Subject: f2fs: clean up val{>>,<<}F2FS_BLKSIZE_BITS

Use F2FS_BYTES_TO_BLK(bytes) and F2FS_BLK_TO_BYTES(blk) for cleanup

Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 01bee2b289c2..f17f89a259e2 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,7 +19,6 @@
 #define F2FS_BLKSIZE_BITS		PAGE_SHIFT /* bits for F2FS_BLKSIZE */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
 #define F2FS_EXTENSION_LEN		8	/* max size of extension */
-#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
 
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
@@ -28,6 +27,7 @@
 #define F2FS_BYTES_TO_BLK(bytes)	((bytes) >> F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_TO_BYTES(blk)		((blk) << F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
+#define F2FS_BLK_ALIGN(x)			(F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1))
 
 /* 0, 1(node nid), 2(meta nid) are reserved node id */
 #define F2FS_RESERVED_NODE_NUM		3
-- 
cgit v1.2.3


From 013f510d1fce563f37f420bf231b065885a16f21 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 1 Aug 2024 21:31:21 +0800
Subject: dm: Remove unused declaration dm_get_rq_mapinfo()

Commit ae6ad75e5c3c ("dm: remove unused dm_get_rq_mapinfo()")
removed the implementation but leave declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/device-mapper.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 53ca3a913d06..8321f65897f3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
 void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors);
 void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
-union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 struct dm_report_zones_args {
-- 
cgit v1.2.3


From 0e1d5731d3c1e2214249ef36dcd13ad51ad304cf Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:30 +0206
Subject: printk: Check printk_deferred_enter()/_exit() usage

Add validation that printk_deferred_enter()/_exit() are called in
non-migration contexts.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-5-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index b937cefcb31c..eee8e97da681 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -161,15 +161,16 @@ int _printk(const char *fmt, ...);
  */
 __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
 
-extern void __printk_safe_enter(void);
-extern void __printk_safe_exit(void);
+extern void __printk_deferred_enter(void);
+extern void __printk_deferred_exit(void);
+
 /*
  * The printk_deferred_enter/exit macros are available only as a hack for
  * some code paths that need to defer all printk console printing. Interrupts
  * must be disabled for the deferred duration.
  */
-#define printk_deferred_enter __printk_safe_enter
-#define printk_deferred_exit __printk_safe_exit
+#define printk_deferred_enter() __printk_deferred_enter()
+#define printk_deferred_exit() __printk_deferred_exit()
 
 /*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
-- 
cgit v1.2.3


From b7049d88c1763d5f133b0e93e39da8e89a9f944b Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:32 +0206
Subject: printk: nbcon: Remove return value for write_atomic()

The return value of write_atomic() does not provide any useful
information. On the contrary, it makes things more complicated
for the caller to appropriately deal with the information.

Change write_atomic() to not have a return value. If the
message did not get printed due to loss of ownership, the
caller will notice this on its own. If ownership was not lost,
it will be assumed that the driver successfully printed the
message and the sequence number for that console will be
incremented.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-7-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 31a8f5b85f5d..577b157fe4fb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -345,7 +345,7 @@ struct console {
 	struct hlist_node	node;
 
 	/* nbcon console specific members */
-	bool			(*write_atomic)(struct console *con,
+	void			(*write_atomic)(struct console *con,
 						struct nbcon_write_context *wctxt);
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
-- 
cgit v1.2.3


From 7d56936c1e5208b5eeea2a9a2f70e85248f6da49 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:33 +0206
Subject: printk: nbcon: Add detailed doc for write_atomic()

The write_atomic() callback has special requirements and is
allowed to use special helper functions. Provide detailed
documentation of the callback so that a developer has a
chance of implementing it correctly.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-8-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 577b157fe4fb..35c64ee3827b 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -303,7 +303,7 @@ struct nbcon_write_context {
 /**
  * struct console - The console descriptor structure
  * @name:		The name of the console driver
- * @write:		Write callback to output messages (Optional)
+ * @write:		Legacy write callback to output messages (Optional)
  * @read:		Read callback for console input (Optional)
  * @device:		The underlying TTY device driver (Optional)
  * @unblank:		Callback to unblank the console (Optional)
@@ -320,7 +320,6 @@ struct nbcon_write_context {
  * @data:		Driver private data
  * @node:		hlist node for the console list
  *
- * @write_atomic:	Write callback for atomic context
  * @nbcon_state:	State for nbcon consoles
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
  * @pbufs:		Pointer to nbcon private buffer
@@ -345,8 +344,34 @@ struct console {
 	struct hlist_node	node;
 
 	/* nbcon console specific members */
-	void			(*write_atomic)(struct console *con,
-						struct nbcon_write_context *wctxt);
+
+	/**
+	 * @write_atomic:
+	 *
+	 * NBCON callback to write out text in any context. (Optional)
+	 *
+	 * This callback is called with the console already acquired. However,
+	 * a higher priority context is allowed to take it over by default.
+	 *
+	 * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe()
+	 * around any code where the takeover is not safe, for example, when
+	 * manipulating the serial port registers.
+	 *
+	 * nbcon_enter_unsafe() will fail if the context has lost the console
+	 * ownership in the meantime. In this case, the callback is no longer
+	 * allowed to go forward. It must back out immediately and carefully.
+	 * The buffer content is also no longer trusted since it no longer
+	 * belongs to the context.
+	 *
+	 * The callback should allow the takeover whenever it is safe. It
+	 * increases the chance to see messages when the system is in trouble.
+	 *
+	 * The callback can be called from any context (including NMI).
+	 * Therefore it must avoid usage of any locking and instead rely
+	 * on the console ownership for synchronization.
+	 */
+	void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
+
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
 	struct printk_buffers	*pbufs;
-- 
cgit v1.2.3


From 7a16a771890746b4060333d665ebe674945a3cfa Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:34 +0206
Subject: printk: nbcon: Add callbacks to synchronize with driver

Console drivers typically must deal with access to the hardware
via user input/output (such as an interactive login shell) and
output of kernel messages via printk() calls. To provide the
necessary synchronization, usually some driver-specific locking
mechanism is used (for example, the port spinlock for uart
serial consoles).

Until now, usage of this driver-specific locking has been hidden
from the printk-subsystem and implemented within the various
console callbacks. However, nbcon consoles would need to use it
even in the generic code.

Add device_lock() and device_unlock() callback which will need
to get implemented by nbcon consoles.

The callbacks will use whatever synchronization mechanism the
driver is using for itself. The minimum requirement is to
prevent CPU migration. It would allow a context friendly
acquiring of nbcon console ownership in non-emergency and
non-panic context.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-9-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 35c64ee3827b..46b3c210b931 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -372,6 +372,49 @@ struct console {
 	 */
 	void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
 
+	/**
+	 * @device_lock:
+	 *
+	 * NBCON callback to begin synchronization with driver code.
+	 *
+	 * Console drivers typically must deal with access to the hardware
+	 * via user input/output (such as an interactive login shell) and
+	 * output of kernel messages via printk() calls. This callback is
+	 * called by the printk-subsystem whenever it needs to synchronize
+	 * with hardware access by the driver. It should be implemented to
+	 * use whatever synchronization mechanism the driver is using for
+	 * itself (for example, the port lock for uart serial consoles).
+	 *
+	 * The callback is always called from task context. It may use any
+	 * synchronization method required by the driver.
+	 *
+	 * IMPORTANT: The callback MUST disable migration. The console driver
+	 *	may be using a synchronization mechanism that already takes
+	 *	care of this (such as spinlocks). Otherwise this function must
+	 *	explicitly call migrate_disable().
+	 *
+	 * The flags argument is provided as a convenience to the driver. It
+	 * will be passed again to device_unlock(). It can be ignored if the
+	 * driver does not need it.
+	 */
+	void (*device_lock)(struct console *con, unsigned long *flags);
+
+	/**
+	 * @device_unlock:
+	 *
+	 * NBCON callback to finish synchronization with driver code.
+	 *
+	 * It is the counterpart to device_lock().
+	 *
+	 * This callback is always called from task context. It must
+	 * appropriately re-enable migration (depending on how device_lock()
+	 * disabled migration).
+	 *
+	 * The flags argument is the value of the same variable that was
+	 * passed to device_lock().
+	 */
+	void (*device_unlock)(struct console *con, unsigned long flags);
+
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
 	struct printk_buffers	*pbufs;
-- 
cgit v1.2.3


From 77e73c0687f5bc884fa1b0cb97aca912bcc01957 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:36 +0206
Subject: serial: core: Provide low-level functions to lock port

It will be necessary at times for the uart nbcon console
drivers to acquire the port lock directly (without the
additional nbcon functionality of the port lock wrappers).
These are special cases such as the implementation of the
device_lock()/device_unlock() callbacks or for internal
port lock wrapper synchronization.

Provide low-level variants __uart_port_lock_irqsave() and
__uart_port_unlock_irqrestore() for this purpose.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240820063001.36405-11-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/serial_core.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index aea25eef9a1a..8872cd21e70a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -590,6 +590,24 @@ struct uart_port {
 	void			*private_data;		/* generic platform data pointer */
 };
 
+/*
+ * Only for console->device_lock()/_unlock() callbacks and internal
+ * port lock wrapper synchronization.
+ */
+static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
+{
+	spin_lock_irqsave(&up->lock, *flags);
+}
+
+/*
+ * Only for console->device_lock()/_unlock() callbacks and internal
+ * port lock wrapper synchronization.
+ */
+static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
+{
+	spin_unlock_irqrestore(&up->lock, flags);
+}
+
 /**
  * uart_port_lock - Lock the UART port
  * @up:		Pointer to UART port structure
-- 
cgit v1.2.3


From eabd4600dafacf9895184259373f2445ff748654 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:37 +0206
Subject: serial: core: Introduce wrapper to set @uart_port->cons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce uart_port_set_cons() as a wrapper to set @cons of a
uart_port. The wrapper sets @cons under the port lock in order
to prevent @cons from disappearing while another context is
holding the port lock. This is necessary for a follow-up
commit relating to the port lock wrappers, which rely on @cons
not changing between lock and unlock.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Tested-by: Théo Lebrun <theo.lebrun@bootlin.com> # EyeQ5, AMBA-PL011
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20240820063001.36405-12-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/serial_core.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8872cd21e70a..2cf03ff2056a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -608,6 +608,23 @@ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned
 	spin_unlock_irqrestore(&up->lock, flags);
 }
 
+/**
+ * uart_port_set_cons - Safely set the @cons field for a uart
+ * @up:		The uart port to set
+ * @con:	The new console to set to
+ *
+ * This function must be used to set @up->cons. It uses the port lock to
+ * synchronize with the port lock wrappers in order to ensure that the console
+ * cannot change or disappear while another context is holding the port lock.
+ */
+static inline void uart_port_set_cons(struct uart_port *up, struct console *con)
+{
+	unsigned long flags;
+
+	__uart_port_lock_irqsave(up, &flags);
+	up->cons = con;
+	__uart_port_unlock_irqrestore(up, flags);
+}
 /**
  * uart_port_lock - Lock the UART port
  * @up:		Pointer to UART port structure
-- 
cgit v1.2.3


From dc219d8d858d95549543aa7a41b6e64a0da9e406 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:38 +0206
Subject: console: Improve console_srcu_read_flags() comments

It was not clear when exactly console_srcu_read_flags() must be
used vs. directly reading @console->flags.

Refactor and clarify that console_srcu_read_flags() is only
needed if the console is registered or the caller is in a
context where the registration status of the console may change
(due to another context).

The function requires the caller holds @console_srcu, which will
ensure that the caller sees an appropriate @flags value for the
registered console and that exit/cleanup routines will not run
if the console is in the process of unregistration.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-13-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 46b3c210b931..aafe3121b74e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -446,28 +446,34 @@ extern void console_list_unlock(void) __releases(console_mutex);
 extern struct hlist_head console_list;
 
 /**
- * console_srcu_read_flags - Locklessly read the console flags
+ * console_srcu_read_flags - Locklessly read flags of a possibly registered
+ *				console
  * @con:	struct console pointer of console to read flags from
  *
- * This function provides the necessary READ_ONCE() and data_race()
- * notation for locklessly reading the console flags. The READ_ONCE()
- * in this function matches the WRITE_ONCE() when @flags are modified
- * for registered consoles with console_srcu_write_flags().
+ * Locklessly reading @con->flags provides a consistent read value because
+ * there is at most one CPU modifying @con->flags and that CPU is using only
+ * read-modify-write operations to do so.
  *
- * Only use this function to read console flags when locklessly
- * iterating the console list via srcu.
+ * Requires console_srcu_read_lock to be held, which implies that @con might
+ * be a registered console. The purpose of holding console_srcu_read_lock is
+ * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED)
+ * and that no exit/cleanup routines will run if the console is currently
+ * undergoing unregistration.
+ *
+ * If the caller is holding the console_list_lock or it is _certain_ that
+ * @con is not and will not become registered, the caller may read
+ * @con->flags directly instead.
  *
  * Context: Any context.
+ * Return: The current value of the @con->flags field.
  */
 static inline short console_srcu_read_flags(const struct console *con)
 {
 	WARN_ON_ONCE(!console_srcu_read_lock_is_held());
 
 	/*
-	 * Locklessly reading console->flags provides a consistent
-	 * read value because there is at most one CPU modifying
-	 * console->flags and that CPU is using only read-modify-write
-	 * operations to do so.
+	 * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified
+	 * for registered consoles with console_srcu_write_flags().
 	 */
 	return data_race(READ_ONCE(con->flags));
 }
-- 
cgit v1.2.3


From adf6f37d142e14896115929f3892bbc0a86e25bf Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:39 +0206
Subject: nbcon: Add API to acquire context for non-printing operations

Provide functions nbcon_device_try_acquire() and
nbcon_device_release() which will try to acquire the nbcon
console ownership with NBCON_PRIO_NORMAL and mark it unsafe for
handover/takeover.

These functions are to be used together with the device-specific
locking when performing non-printing activities on the console
device. They will allow synchronization against the
atomic_write() callback which will be serialized, for higher
priority contexts, only by acquiring the console context
ownership.

Pitfalls:

The API requires to be called in a context with migration
disabled because it uses per-CPU variables internally.

The context is set unsafe for a takeover all the time. It
guarantees full serialization against any atomic_write() caller
except for the final flush in panic() which might try an unsafe
takeover.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-14-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h |  2 ++
 include/linux/printk.h  | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index aafe3121b74e..3706f944de46 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -322,6 +322,7 @@ struct nbcon_write_context {
  *
  * @nbcon_state:	State for nbcon consoles
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
+ * @nbcon_device_ctxt:	Context available for non-printing operations
  * @pbufs:		Pointer to nbcon private buffer
  */
 struct console {
@@ -417,6 +418,7 @@ struct console {
 
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
+	struct nbcon_context	__private nbcon_device_ctxt;
 	struct printk_buffers	*pbufs;
 };
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index eee8e97da681..9687089f5ace 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -9,6 +9,8 @@
 #include <linux/ratelimit_types.h>
 #include <linux/once_lite.h>
 
+struct console;
+
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
@@ -198,6 +200,8 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
 void printk_trigger_flush(void);
 void console_try_replay_all(void);
+extern bool nbcon_device_try_acquire(struct console *con);
+extern void nbcon_device_release(struct console *con);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -280,6 +284,16 @@ static inline void printk_trigger_flush(void)
 static inline void console_try_replay_all(void)
 {
 }
+
+static inline bool nbcon_device_try_acquire(struct console *con)
+{
+	return false;
+}
+
+static inline void nbcon_device_release(struct console *con)
+{
+}
+
 #endif
 
 bool this_cpu_in_panic(void);
-- 
cgit v1.2.3


From 4126f149c48b2ae757d11ea24675f7071ab22ebf Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:40 +0206
Subject: serial: core: Acquire nbcon context in port->lock wrapper

Currently the port->lock wrappers uart_port_lock(),
uart_port_unlock() (and their variants) only lock/unlock
the spin_lock.

If the port is an nbcon console that has implemented the
write_atomic() callback, the wrappers must also acquire/release
the console context and mark the region as unsafe. This allows
general port->lock synchronization to be synchronized against
the nbcon write_atomic() callback.

Note that __uart_port_using_nbcon() relies on the port->lock
being held while a console is added and removed from the
console list (i.e. all uart nbcon drivers *must* take the
port->lock in their device_lock() callbacks).

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-15-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/serial_core.h | 82 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 2cf03ff2056a..4ab65874a850 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -11,6 +11,8 @@
 #include <linux/compiler.h>
 #include <linux/console.h>
 #include <linux/interrupt.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -625,6 +627,60 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con)
 	up->cons = con;
 	__uart_port_unlock_irqrestore(up, flags);
 }
+
+/* Only for internal port lock wrapper usage. */
+static inline bool __uart_port_using_nbcon(struct uart_port *up)
+{
+	lockdep_assert_held_once(&up->lock);
+
+	if (likely(!uart_console(up)))
+		return false;
+
+	/*
+	 * @up->cons is only modified under the port lock. Therefore it is
+	 * certain that it cannot disappear here.
+	 *
+	 * @up->cons->node is added/removed from the console list under the
+	 * port lock. Therefore it is certain that the registration status
+	 * cannot change here, thus @up->cons->flags can be read directly.
+	 */
+	if (hlist_unhashed_lockless(&up->cons->node) ||
+	    !(up->cons->flags & CON_NBCON) ||
+	    !up->cons->write_atomic) {
+		return false;
+	}
+
+	return true;
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up)
+{
+	if (!__uart_port_using_nbcon(up))
+		return true;
+
+	return nbcon_device_try_acquire(up->cons);
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline void __uart_port_nbcon_acquire(struct uart_port *up)
+{
+	if (!__uart_port_using_nbcon(up))
+		return;
+
+	while (!nbcon_device_try_acquire(up->cons))
+		cpu_relax();
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline void __uart_port_nbcon_release(struct uart_port *up)
+{
+	if (!__uart_port_using_nbcon(up))
+		return;
+
+	nbcon_device_release(up->cons);
+}
+
 /**
  * uart_port_lock - Lock the UART port
  * @up:		Pointer to UART port structure
@@ -632,6 +688,7 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con)
 static inline void uart_port_lock(struct uart_port *up)
 {
 	spin_lock(&up->lock);
+	__uart_port_nbcon_acquire(up);
 }
 
 /**
@@ -641,6 +698,7 @@ static inline void uart_port_lock(struct uart_port *up)
 static inline void uart_port_lock_irq(struct uart_port *up)
 {
 	spin_lock_irq(&up->lock);
+	__uart_port_nbcon_acquire(up);
 }
 
 /**
@@ -651,6 +709,7 @@ static inline void uart_port_lock_irq(struct uart_port *up)
 static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
 {
 	spin_lock_irqsave(&up->lock, *flags);
+	__uart_port_nbcon_acquire(up);
 }
 
 /**
@@ -661,7 +720,15 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f
  */
 static inline bool uart_port_trylock(struct uart_port *up)
 {
-	return spin_trylock(&up->lock);
+	if (!spin_trylock(&up->lock))
+		return false;
+
+	if (!__uart_port_nbcon_try_acquire(up)) {
+		spin_unlock(&up->lock);
+		return false;
+	}
+
+	return true;
 }
 
 /**
@@ -673,7 +740,15 @@ static inline bool uart_port_trylock(struct uart_port *up)
  */
 static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
 {
-	return spin_trylock_irqsave(&up->lock, *flags);
+	if (!spin_trylock_irqsave(&up->lock, *flags))
+		return false;
+
+	if (!__uart_port_nbcon_try_acquire(up)) {
+		spin_unlock_irqrestore(&up->lock, *flags);
+		return false;
+	}
+
+	return true;
 }
 
 /**
@@ -682,6 +757,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long
  */
 static inline void uart_port_unlock(struct uart_port *up)
 {
+	__uart_port_nbcon_release(up);
 	spin_unlock(&up->lock);
 }
 
@@ -691,6 +767,7 @@ static inline void uart_port_unlock(struct uart_port *up)
  */
 static inline void uart_port_unlock_irq(struct uart_port *up)
 {
+	__uart_port_nbcon_release(up);
 	spin_unlock_irq(&up->lock);
 }
 
@@ -701,6 +778,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up)
  */
 static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
 {
+	__uart_port_nbcon_release(up);
 	spin_unlock_irqrestore(&up->lock, flags);
 }
 
-- 
cgit v1.2.3


From 5dde3b7354133846079fa51e55e74ef90a836759 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:52 +0206
Subject: printk: nbcon: Add unsafe flushing on panic

Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles
using the write_atomic() callback and allowing unsafe hostile
takeovers. Call this at the end of panic() as a final attempt
to flush any pending messages.

Note that legacy consoles use unsafe methods for flushing
from the beginning of panic (see bust_spinlocks()). Therefore,
systems using both legacy and nbcon consoles may still fail to
see panic messages due to unsafe legacy console usage.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-27-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9687089f5ace..2e083f01f8a3 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -202,6 +202,7 @@ void printk_trigger_flush(void);
 void console_try_replay_all(void);
 extern bool nbcon_device_try_acquire(struct console *con);
 extern void nbcon_device_release(struct console *con);
+void nbcon_atomic_flush_unsafe(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -294,6 +295,10 @@ static inline void nbcon_device_release(struct console *con)
 {
 }
 
+static inline void nbcon_atomic_flush_unsafe(void)
+{
+}
+
 #endif
 
 bool this_cpu_in_panic(void);
-- 
cgit v1.2.3


From e35a8884270bae11196eedf3b0a5bf22619f11f2 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:55 +0206
Subject: printk: Coordinate direct printing in panic

If legacy and nbcon consoles are registered and the nbcon
consoles are allowed to flush (i.e. no boot consoles
registered), the legacy consoles will no longer perform
direct printing on the panic CPU until after the backtrace
has been stored. This will give the safe nbcon consoles a
chance to print the panic messages before allowing the
unsafe legacy consoles to print.

If no nbcon consoles are registered or they are not allowed
to flush because boot consoles are registered, there is no
change in behavior (i.e. legacy consoles will always attempt
to print from the printk() caller context).

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-30-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 2e083f01f8a3..eca9bb2ee637 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -200,6 +200,7 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
 void printk_trigger_flush(void);
 void console_try_replay_all(void);
+void printk_legacy_allow_panic_sync(void);
 extern bool nbcon_device_try_acquire(struct console *con);
 extern void nbcon_device_release(struct console *con);
 void nbcon_atomic_flush_unsafe(void);
@@ -286,6 +287,10 @@ static inline void console_try_replay_all(void)
 {
 }
 
+static inline void printk_legacy_allow_panic_sync(void)
+{
+}
+
 static inline bool nbcon_device_try_acquire(struct console *con)
 {
 	return false;
-- 
cgit v1.2.3


From ecb5e1aa82c86642ec1eaafefd4e317dfba3a238 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:57 +0206
Subject: printk: nbcon: Implement emergency sections

In emergency situations (something has gone wrong but the
system continues to operate), usually important information
(such as a backtrace) is generated via printk(). This
information should be pushed out to the consoles ASAP.

Add per-CPU emergency nesting tracking because an emergency
can arise while in an emergency situation.

Add functions to mark the beginning and end of emergency
sections where the urgent messages are generated.

Perform direct console flushing at the emergency priority if
the current CPU is in an emergency state and it is safe to do
so.

Note that the emergency state is not system-wide. While one CPU
is in an emergency state, another CPU may attempt to print
console messages at normal priority.

Also note that printk() already attempts to flush consoles in
the caller context for normal priority. However, follow-up
changes will introduce printing kthreads, in which case the
normal priority printk() calls will offload to the kthreads.

Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 3706f944de46..9a13f91b0c43 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con)
 	hlist_for_each_entry(con, &console_list, node)
 
 #ifdef CONFIG_PRINTK
+extern void nbcon_cpu_emergency_enter(void);
+extern void nbcon_cpu_emergency_exit(void);
 extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
 extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
 extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
 #else
+static inline void nbcon_cpu_emergency_enter(void) { }
+static inline void nbcon_cpu_emergency_exit(void) { }
 static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
-- 
cgit v1.2.3


From 3568affcddd68743e25aa3ec1647d9b82797757b Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Tue, 20 Aug 2024 13:29:30 -0700
Subject: soc: qcom: pmic_glink: Fix race during initialization

As pointed out by Stephen Boyd it is possible that during initialization
of the pmic_glink child drivers, the protection-domain notifiers fires,
and the associated work is scheduled, before the client registration
returns and as a result the local "client" pointer has been initialized.

The outcome of this is a NULL pointer dereference as the "client"
pointer is blindly dereferenced.

Timeline provided by Stephen:
 CPU0                               CPU1
 ----                               ----
 ucsi->client = NULL;
 devm_pmic_glink_register_client()
  client->pdr_notify(client->priv, pg->client_state)
   pmic_glink_ucsi_pdr_notify()
    schedule_work(&ucsi->register_work)
    <schedule away>
                                    pmic_glink_ucsi_register()
                                     ucsi_register()
                                      pmic_glink_ucsi_read_version()
                                       pmic_glink_ucsi_read()
                                        pmic_glink_ucsi_read()
                                         pmic_glink_send(ucsi->client)
                                         <client is NULL BAD>
 ucsi->client = client // Too late!

This code is identical across the altmode, battery manager and usci
child drivers.

Resolve this by splitting the allocation of the "client" object and the
registration thereof into two operations.

This only happens if the protection domain registry is populated at the
time of registration, which by the introduction of commit '1ebcde047c54
("soc: qcom: add pd-mapper implementation")' became much more likely.

Reported-by: Amit Pundir <amit.pundir@linaro.org>
Closes: https://lore.kernel.org/all/CAMi1Hd2_a7TjA7J9ShrAbNOd_CoZ3D87twmO5t+nZxC9sX18tA@mail.gmail.com/
Reported-by: Johan Hovold <johan@kernel.org>
Closes: https://lore.kernel.org/all/ZqiyLvP0gkBnuekL@hovoldconsulting.com/
Reported-by: Stephen Boyd <swboyd@chromium.org>
Closes: https://lore.kernel.org/all/CAE-0n52JgfCBWiFQyQWPji8cq_rCsviBpW-m72YitgNfdaEhQg@mail.gmail.com/
Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver")
Cc: stable@vger.kernel.org
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Amit Pundir <amit.pundir@linaro.org>
Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Tested-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-1-eec53c750a04@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/pmic_glink.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/soc/qcom/pmic_glink.h b/include/linux/soc/qcom/pmic_glink.h
index fd124aa18c81..7cddf1027752 100644
--- a/include/linux/soc/qcom/pmic_glink.h
+++ b/include/linux/soc/qcom/pmic_glink.h
@@ -23,10 +23,11 @@ struct pmic_glink_hdr {
 
 int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len);
 
-struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev,
-							  unsigned int id,
-							  void (*cb)(const void *, size_t, void *),
-							  void (*pdr)(void *, int),
-							  void *priv);
+struct pmic_glink_client *devm_pmic_glink_client_alloc(struct device *dev,
+						       unsigned int id,
+						       void (*cb)(const void *, size_t, void *),
+						       void (*pdr)(void *, int),
+						       void *priv);
+void pmic_glink_client_register(struct pmic_glink_client *client);
 
 #endif
-- 
cgit v1.2.3


From d156263e247c334a8872578118e0709ed63c4806 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 21 Aug 2024 06:37:39 -1000
Subject: workqueue: Fix another htmldocs build warning

Fix htmldocs build warning introduced by 9b59a85a84dc ("workqueue: Don't
call va_start / va_end twice").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Matthew Brost <matthew.brost@intel.com>
---
 include/linux/workqueue.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f3cc15940b4d..59c2695e12e7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
  * @lockdep_map: user-defined lockdep_map
- * @...: args for @fmt
+ * @args: args for @fmt
  *
  * Same as alloc_ordered_workqueue but with the a user-define lockdep_map.
  * Useful for workqueues created with the same purpose and to avoid leaking a
@@ -544,7 +544,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...)	\
-	alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args)
+	alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),	\
+				    1, lockdep_map, ##args)
 #endif
 
 /**
-- 
cgit v1.2.3


From 496ddd19a0fad22f250fc7a7b7a8000155418934 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 8 Aug 2024 16:22:28 -0700
Subject: bpf: extract iterator argument type and name validation logic

Verifier enforces that all iterator structs are named `bpf_iter_<name>`
and that whenever iterator is passed to a kfunc it's passed as a valid PTR ->
STRUCT chain (with potentially const modifiers in between).

We'll need this check for upcoming changes, so instead of duplicating
the logic, extract it into a helper function.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240808232230.2848712-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index cffb43133c68..b8a583194c4a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -580,6 +580,7 @@ bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
 bool btf_types_are_same(const struct btf *btf1, u32 id1,
 			const struct btf *btf2, u32 id2);
+int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
@@ -654,6 +655,10 @@ static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
 {
 	return false;
 }
+static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
-- 
cgit v1.2.3


From 5151fa35ae5979821d091b80096b4c790b187bac Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Fri, 16 Aug 2024 14:52:28 +0300
Subject: drm/fourcc: define Intel Xe2 related tile4 ccs modifiers

Add Tile4 type ccs modifiers to indicate presence of compression on Xe2.
Here is defined I915_FORMAT_MOD_4_TILED_LNL_CCS which is meant for
integrated graphics with igpu related limitations
Here is also defined I915_FORMAT_MOD_4_TILED_BMG_CCS which is meant
for discrete graphics with dgpu related limitations

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-3-juhapekka.heikkila@gmail.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/drm_fourcc.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 2d84a8052b15..78abd819fd62 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -702,6 +702,31 @@ extern "C" {
  */
 #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15)
 
+/*
+ * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression
+ * on integrated graphics
+ *
+ * The main surface is Tile 4 and at plane index 0. For semi-planar formats
+ * like NV12, the Y and UV planes are Tile 4 and are located at plane indices
+ * 0 and 1, respectively. The CCS for all planes are stored outside of the
+ * GEM object in a reserved memory area dedicated for the storage of the
+ * CCS data for all compressible GEM objects.
+ */
+#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16)
+
+/*
+ * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression
+ * on discrete graphics
+ *
+ * The main surface is Tile 4 and at plane index 0. For semi-planar formats
+ * like NV12, the Y and UV planes are Tile 4 and are located at plane indices
+ * 0 and 1, respectively. The CCS for all planes are stored outside of the
+ * GEM object in a reserved memory area dedicated for the storage of the
+ * CCS data for all compressible GEM objects. The GEM object must be stored in
+ * contiguous memory with a size aligned to 64KB
+ */
+#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17)
+
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
  *
-- 
cgit v1.2.3


From 74b1e94e94ea369923e5656eeb10b26b16591327 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 20 Aug 2024 13:51:19 -0700
Subject: net: repack struct netdev_queue

Adding the NAPI pointer to struct netdev_queue made it grow into another
cacheline, even though there was 44 bytes of padding available.

The struct was historically grouped as follows:

    /* read-mostly stuff (align) */
    /* ... random control path fields ... */
    /* write-mostly stuff (align) */
    /* ... 40 byte hole ... */
    /* struct dql (align) */

It seems that people want to add control path fields after
the read only fields. struct dql looks pretty innocent
but it forces its own alignment and nothing indicates that
there is a lot of empty space above it.

Move dql above the xmit_lock. This shifts the empty space
to the end of the struct rather than in the middle of it.
Move two example fields there to set an example.
Hopefully people will now add new fields at the end of
the struct. A lot of the read-only stuff is also control
path-only, but if we move it all we'll have another hole
in the middle.

Before:
	/* size: 384, cachelines: 6, members: 16 */
	/* sum members: 284, holes: 3, sum holes: 100 */

After:
        /* size: 320, cachelines: 5, members: 16 */
        /* sum members: 284, holes: 1, sum holes: 8 */

Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240820205119.1321322-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0ef3eaa23f4b..614ec5d3d75b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -644,9 +644,6 @@ struct netdev_queue {
 	struct Qdisc __rcu	*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
-#endif
-#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
-	int			numa_node;
 #endif
 	unsigned long		tx_maxrate;
 	/*
@@ -660,13 +657,13 @@ struct netdev_queue {
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool    *pool;
 #endif
-	/* NAPI instance for the queue
-	 * Readers and writers must hold RTNL
-	 */
-	struct napi_struct      *napi;
+
 /*
  * write-mostly part
  */
+#ifdef CONFIG_BQL
+	struct dql		dql;
+#endif
 	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
 	int			xmit_lock_owner;
 	/*
@@ -676,8 +673,16 @@ struct netdev_queue {
 
 	unsigned long		state;
 
-#ifdef CONFIG_BQL
-	struct dql		dql;
+/*
+ * slow- / control-path part
+ */
+	/* NAPI instance for the queue
+	 * Readers and writers must hold RTNL
+	 */
+	struct napi_struct	*napi;
+
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	int			numa_node;
 #endif
 } ____cacheline_aligned_in_smp;
 
-- 
cgit v1.2.3


From ba0fb44aed47693cc2482427f63ba6cd19051327 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sun, 11 Aug 2024 10:09:35 +0300
Subject: dma-mapping: replace zone_dma_bits by zone_dma_limit

The hardware DMA limit might not be power of 2. When RAM range starts
above 0, say 4GB, DMA limit of 30 bits should end at 5GB.  A single high
bit can not encode this limit.

Use a plain  address for the DMA zone limit instead.

Since the DMA zone can now potentially span beyond 4GB physical limit of
DMA32, make sure to use DMA zone for GFP_DMA32 allocations in that case.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Co-developed-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Petr Tesarik <ptesarik@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-direct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index edbe13d00776..d7e30d4f7503 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -12,7 +12,7 @@
 #include <linux/mem_encrypt.h>
 #include <linux/swiotlb.h>
 
-extern unsigned int zone_dma_bits;
+extern u64 zone_dma_limit;
 
 /*
  * Record the mapping of CPU physical to DMA addresses for a given region.
-- 
cgit v1.2.3


From b5c58b2fdc427e7958412ecb2de2804a1f7c1572 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Wed, 24 Jul 2024 21:04:49 +0300
Subject: dma-mapping: direct calls for dma-iommu

Directly call into dma-iommu just like we have been doing for dma-direct
for a while.  This avoids the indirect call overhead for IOMMU ops and
removes the need to have DMA ops entirely for many common configurations.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/device.h      |   5 ++
 include/linux/dma-map-ops.h |  13 ----
 include/linux/iommu-dma.h   | 147 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/iommu-dma.h

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 34eb20f5966f..1c5280d28bc3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -707,6 +707,8 @@ struct device_physical_location {
  *		for dma allocations.  This flag is managed by the dma ops
  *		instance from ->dma_supported.
  * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
+ * @dma_iommu: Device is using default IOMMU implementation for DMA and
+ *		doesn't rely on dma_ops structure.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -822,6 +824,9 @@ struct device {
 #ifdef CONFIG_DMA_NEED_SYNC
 	bool			dma_skip_sync:1;
 #endif
+#ifdef CONFIG_IOMMU_DMA
+	bool			dma_iommu:1;
+#endif
 };
 
 /**
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 02a1c825896b..077b15c93bb8 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -13,20 +13,7 @@
 struct cma;
 struct iommu_ops;
 
-/*
- * Values for struct dma_map_ops.flags:
- *
- * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can
- * handle PCI P2PDMA pages in the map_sg/unmap_sg operation.
- * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is
- * coherent and it's not an SWIOTLB buffer.
- */
-#define DMA_F_PCI_P2PDMA_SUPPORTED     (1 << 0)
-#define DMA_F_CAN_SKIP_SYNC            (1 << 1)
-
 struct dma_map_ops {
-	unsigned int flags;
-
 	void *(*alloc)(struct device *dev, size_t size,
 			dma_addr_t *dma_handle, gfp_t gfp,
 			unsigned long attrs);
diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
new file mode 100644
index 000000000000..d30a58bf00fd
--- /dev/null
+++ b/include/linux/iommu-dma.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ *
+ * DMA operations that map physical memory through IOMMU.
+ */
+#ifndef _LINUX_IOMMU_DMA_H
+#define _LINUX_IOMMU_DMA_H
+
+#include <linux/dma-direction.h>
+
+#ifdef CONFIG_IOMMU_DMA
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs);
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, unsigned long attrs);
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, unsigned long attrs);
+void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t gfp, unsigned long attrs);
+int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs);
+int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs);
+unsigned long iommu_dma_get_merge_boundary(struct device *dev);
+size_t iommu_dma_opt_mapping_size(void);
+size_t iommu_dma_max_mapping_size(struct device *dev);
+void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t handle, unsigned long attrs);
+dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
+void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
+struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
+		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
+void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
+		struct sg_table *sgt, enum dma_data_direction dir);
+void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction dir);
+void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction dir);
+void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+		int nelems, enum dma_data_direction dir);
+void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+		int nelems, enum dma_data_direction dir);
+#else
+static inline dma_addr_t iommu_dma_map_page(struct device *dev,
+		struct page *page, unsigned long offset, size_t size,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	return DMA_MAPPING_ERROR;
+}
+static inline void iommu_dma_unmap_page(struct device *dev,
+		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
+static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return -EINVAL;
+}
+static inline void iommu_dma_unmap_sg(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
+static inline void *iommu_dma_alloc(struct device *dev, size_t size,
+		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
+{
+	return NULL;
+}
+static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
+{
+	return -EINVAL;
+}
+static inline int iommu_dma_get_sgtable(struct device *dev,
+		struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr,
+		size_t size, unsigned long attrs)
+{
+	return -EINVAL;
+}
+static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev)
+{
+	return 0;
+}
+static inline size_t iommu_dma_opt_mapping_size(void)
+{
+	return 0;
+}
+static inline size_t iommu_dma_max_mapping_size(struct device *dev)
+{
+	return 0;
+}
+static inline void iommu_dma_free(struct device *dev, size_t size,
+		void *cpu_addr, dma_addr_t handle, unsigned long attrs)
+{
+}
+static inline dma_addr_t iommu_dma_map_resource(struct device *dev,
+		phys_addr_t phys, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	return DMA_MAPPING_ERROR;
+}
+static inline void iommu_dma_unmap_resource(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
+static inline struct sg_table *
+iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
+		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs)
+{
+	return NULL;
+}
+static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
+		struct sg_table *sgt, enum dma_data_direction dir)
+{
+}
+static inline void iommu_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction dir)
+{
+}
+static inline void iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+{
+}
+static inline void iommu_dma_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nelems,
+		enum dma_data_direction dir)
+{
+}
+static inline void iommu_dma_sync_sg_for_device(struct device *dev,
+		struct scatterlist *sgl, int nelems,
+		enum dma_data_direction dir)
+{
+}
+#endif /* CONFIG_IOMMU_DMA */
+#endif /* _LINUX_IOMMU_DMA_H */
-- 
cgit v1.2.3


From 273f8c142003dfd874d45b1a60965809e95ccd50 Mon Sep 17 00:00:00 2001
From: Justin Iurman <justin.iurman@uliege.be>
Date: Sat, 17 Aug 2024 15:18:18 +0200
Subject: net: ipv6: ioam6: new feature tunsrc

This patch provides a new feature (i.e., "tunsrc") for the tunnel (i.e.,
"encap") mode of ioam6. Just like seg6 already does, except it is
attached to a route. The "tunsrc" is optional: when not provided (by
default), the automatic resolution is applied. Using "tunsrc" when
possible has a benefit: performance. See the comparison:
 - before (= "encap" mode): https://ibb.co/bNCzvf7
 - after (= "encap" mode with "tunsrc"): https://ibb.co/PT8L6yq

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/ioam6_iptunnel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
index 38f6a8fdfd34..8aef21e4a8c1 100644
--- a/include/uapi/linux/ioam6_iptunnel.h
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -50,6 +50,12 @@ enum {
 	IOAM6_IPTUNNEL_FREQ_K,		/* u32 */
 	IOAM6_IPTUNNEL_FREQ_N,		/* u32 */
 
+	/* Tunnel src address.
+	 * For encap,auto modes.
+	 * Optional (automatic if not provided).
+	 */
+	IOAM6_IPTUNNEL_SRC,		/* struct in6_addr */
+
 	__IOAM6_IPTUNNEL_MAX,
 };
 
-- 
cgit v1.2.3


From a139c98f760efa1b6f0ee2f36ea6f62f04c8b20a Mon Sep 17 00:00:00 2001
From: Chris Wulff <Chris.Wulff@biamp.com>
Date: Sat, 17 Aug 2024 10:28:51 -0400
Subject: USB: gadget: f_hid: Add GET_REPORT via userspace IOCTL

While supporting GET_REPORT is a mandatory request per the HID
specification the current implementation of the GET_REPORT request responds
to the USB Host with an empty reply of the request length. However, some
USB Hosts will request the contents of feature reports via the GET_REPORT
request. In addition, some proprietary HID 'protocols' will expect
different data, for the same report ID, to be to become available in the
feature report by sending a preceding SET_REPORT to the USB Device that
defines what data is to be presented when that feature report is
subsequently retrieved via GET_REPORT (with a very fast < 5ms turn around
between the SET_REPORT and the GET_REPORT).

There are two other patch sets already submitted for adding GET_REPORT
support. The first [1] allows for pre-priming a list of reports via IOCTLs
which then allows the USB Host to perform the request, with no further
userspace interaction possible during the GET_REPORT request. And another
[2] which allows for a single report to be setup by userspace via IOCTL,
which will be fetched and returned by the kernel for subsequent GET_REPORT
requests by the USB Host, also with no further userspace interaction
possible.

This patch, while loosely based on both the patch sets, differs by allowing
the option for userspace to respond to each GET_REPORT request by setting
up a poll to notify userspace that a new GET_REPORT request has arrived. To
support this, two extra IOCTLs are supplied. The first of which is used to
retrieve the report ID of the GET_REPORT request (in the case of having
non-zero report IDs in the HID descriptor). The second IOCTL allows for
storing report responses in a list for responding to requests.

The report responses are stored in a list (it will be either added if it
does not exist or updated if it exists already). A flag (userspace_req) can
be set to whether subsequent requests notify userspace or not.

Basic operation when a GET_REPORT request arrives from USB Host:

- If the report ID exists in the list and it is set for immediate return
  (i.e. userspace_req == false) then response is sent immediately,
userspace is not notified

- The report ID does not exist, or exists but is set to notify userspace
  (i.e. userspace_req == true) then notify userspace via poll:

	- If userspace responds, and either adds or update the response in
	  the list and respond to the host with the contents

	- If userspace does not respond within the fixed timeout (2500ms)
	  but the report has been set prevously, then send 'old' report
	  contents

	- If userspace does not respond within the fixed timeout (2500ms)
	  and the report does not exist in the list then send an empty
	  report

Note that userspace could 'prime' the report list at any other time.

While this patch allows for flexibility in how the system responds to
requests, and therefore the HID 'protocols' that could be supported, a
drawback is the time it takes to service the requests and therefore the
maximum throughput that would be achievable. The USB HID Specification
v1.11 itself states that GET_REPORT is not intended for periodic data
polling, so this limitation is not severe.

Testing on an iMX8M Nano Ultra Lite with a heavy multi-core CPU loading
showed that userspace can typically respond to the GET_REPORT request
within 1200ms - which is well within the 5000ms most operating systems seem
to allow, and within the 2500ms set by this patch.

[1] https://lore.kernel.org/all/20220805070507.123151-2-sunil@amarulasolutions.com/
[2] https://lore.kernel.org/all/20220726005824.2817646-1-vi@endrift.com/

Signed-off-by: David Sands <david.sands@biamp.com>
Signed-off-by: Chris Wulff <chris.wulff@biamp.com>
Link: https://lore.kernel.org/r/20240817142850.1311460-2-crwulff@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/usb/g_hid.h    | 40 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/usb/gadgetfs.h |  2 +-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/usb/g_hid.h

(limited to 'include')

diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h
new file mode 100644
index 000000000000..b965092db476
--- /dev/null
+++ b/include/uapi/linux/usb/g_hid.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+
+#ifndef __UAPI_LINUX_USB_G_HID_H
+#define __UAPI_LINUX_USB_G_HID_H
+
+#include <linux/types.h>
+
+/* Maximum HID report length for High-Speed USB (i.e. USB 2.0) */
+#define MAX_REPORT_LENGTH 64
+
+/**
+ * struct usb_hidg_report - response to GET_REPORT
+ * @report_id: report ID that this is a response for
+ * @userspace_req:
+ *    !0 this report is used for any pending GET_REPORT request
+ *       but wait on userspace to issue a new report on future requests
+ *    0  this report is to be used for any future GET_REPORT requests
+ * @length: length of the report response
+ * @data: report response
+ * @padding: padding for 32/64 bit compatibility
+ *
+ * Structure used by GADGET_HID_WRITE_GET_REPORT ioctl on /dev/hidg*.
+ */
+struct usb_hidg_report {
+	__u8 report_id;
+	__u8 userspace_req;
+	__u16 length;
+	__u8 data[MAX_REPORT_LENGTH];
+	__u8 padding[4];
+};
+
+/* The 'g' code is used by gadgetfs and hid gadget ioctl requests.
+ * Don't add any colliding codes to either driver, and keep
+ * them in unique ranges.
+ */
+
+#define GADGET_HID_READ_GET_REPORT_ID   _IOR('g', 0x41, __u8)
+#define GADGET_HID_WRITE_GET_REPORT     _IOW('g', 0x42, struct usb_hidg_report)
+
+#endif /* __UAPI_LINUX_USB_G_HID_H */
diff --git a/include/uapi/linux/usb/gadgetfs.h b/include/uapi/linux/usb/gadgetfs.h
index 835473910a49..9754822b2a40 100644
--- a/include/uapi/linux/usb/gadgetfs.h
+++ b/include/uapi/linux/usb/gadgetfs.h
@@ -62,7 +62,7 @@ struct usb_gadgetfs_event {
 };
 
 
-/* The 'g' code is also used by printer gadget ioctl requests.
+/* The 'g' code is also used by printer and hid gadget ioctl requests.
  * Don't add any colliding codes to either driver, and keep
  * them in unique ranges (size 0x20 for now).
  */
-- 
cgit v1.2.3


From 1a9e3b0af301413210319e6946fb4b0b1ad71ccc Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Thu, 22 Aug 2024 14:32:02 +0800
Subject: ASoC: tas2781: mark const variables tas2563_dvc_table as
 __maybe_unused

In case of tas2781, tas2563_dvc_table will be unused,
so mark it as __maybe_unused.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240822063205.662-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2563-tlv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/tas2563-tlv.h b/include/sound/tas2563-tlv.h
index faa3e194f73b..bb269b21f460 100644
--- a/include/sound/tas2563-tlv.h
+++ b/include/sound/tas2563-tlv.h
@@ -18,7 +18,7 @@
 static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
 
 /* pow(10, db/20) * pow(2,30) */
-static const unsigned char tas2563_dvc_table[][4] = {
+static const __maybe_unused unsigned char tas2563_dvc_table[][4] = {
 	{ 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */
 	{ 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */
 	{ 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */
-- 
cgit v1.2.3


From fd69dfe6789f4ed46d1fdb52e223cff83946d997 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 21 Aug 2024 02:13:56 +0000
Subject: ASoC: soc-pcm: Indicate warning if dpcm_playback/capture were used
 for availability limition

I have been wondering why DPCM needs special flag (= dpcm_playback/capture)
to use it. Below is the history why it was added to ASoC.

(A) In beginning, there was no dpcm_xxx flag on ASoC.
    It checks channels_min for DPCM, same as current non-DPCM.
    Let's name it as "validation check" here.

	if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) {
		if (cpu_dai->driver->playback.channels_min)
			playback = 1;
		if (cpu_dai->driver->capture.channels_min)
			capture = 1;

(B) commit 1e9de42f4324 ("ASoC: dpcm: Explicitly set BE DAI link supported
    stream directions") force to use dpcm_xxx flag on DPCM. According to
    this commit log, this is because "Some BE dummy DAI doesn't set
    channels_min for playback/capture". But we don't know which DAI is it,
    and not know why it can't/don't have channels_min. Let's name it as
    "no_chan_DAI" here. According to the code and git-log, it is used as
    DCPM-BE and is CPU DAI. I think the correct solution was set
    channels_min on "no_chan_DAI" side, not update ASoC framework side. But
    everything is under smoke today.

	if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) {
		playback = rtd->dai_link->dpcm_playback;
		capture  = rtd->dai_link->dpcm_capture;

(C) commit 9b5db059366a ("ASoC: soc-pcm: dpcm: Only allow playback/capture
    if supported") checks channels_min (= validation check) again. Because
    DPCM availability was handled by dpcm_xxx flag at that time, but some
    Sound Card set it even though it wasn't available. Clearly there's
    a contradiction here. I think correct solution was update Sound Card
    side instead of ASoC framework. Sound Card side will be updated to
    handle this issue later (commit 25612477d20b ("ASoC: soc-dai: set
    dai_link dpcm_ flags with a helper"))

	if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) {
		...
		playback = rtd->dai_link->dpcm_playback &&
			   snd_soc_dai_stream_valid(cpu_dai, ...);
		capture = rtd->dai_link->dpcm_capture &&
			   snd_soc_dai_stream_valid(cpu_dai, ...);

This (C) patch should have broken "no_chan_DAI" which doesn't have
channels_min, but there was no such report during this 4 years.
Possibilities case are as follows
	- No one is using "no_chan_DAI"
	- "no_chan_DAI" is no longer exist : was removed ?
	- "no_chan_DAI" is no longer exist : has channels_min ?

Because of these history, this dpcm_xxx is unneeded flag today. But because
we have been used it for 10 years since (B), it may have been used
differently. For example some DAI available both playback/capture, but it
set dpcm_playback flag only, in this case dpcm_xxx flag is used as
availability limitation. We can use playback_only flag instead in this
case, but it is very difficult to find such DAI today.

Let's add grace time to remove dpcm_playback/capture flag.

This patch don't use dpcm_xxx flag anymore, and indicates warning to use
xxx_only flag if both playback/capture were available but using only
one of dpcm_xxx flag, and not using xxx_only flag.

Link: https://lore.kernel.org/r/87edaym2cg.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/87seuyaahn.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index e844f6afc5b5..81ef380b2e06 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -815,6 +815,7 @@ struct snd_soc_dai_link {
 	/* This DAI link can route to other DAI links at runtime (Frontend)*/
 	unsigned int dynamic:1;
 
+	/* REMOVE ME */
 	/* DPCM capture and Playback support */
 	unsigned int dpcm_capture:1;
 	unsigned int dpcm_playback:1;
-- 
cgit v1.2.3


From 12806510481497a01d01edd64d7bb53a4d9ec28d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 21 Aug 2024 02:14:02 +0000
Subject: ASoC: remove snd_soc_dai_link_set_capabilities()

dpcm_xxx flags are no longer needed.

We need to use xxx_only flags instead if needed, but
snd_soc_dai_link_set_capabilities() user adds dpcm_xxx if playback/capture
were available. Thus converting dpcm_xxx to xxx_only is not needed.
Just remove it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/87r0aiaahh.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index ab4e109fe71d..0d1b215f24f4 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -219,7 +219,6 @@ void snd_soc_dai_resume(struct snd_soc_dai *dai);
 int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
 			     struct snd_soc_pcm_runtime *rtd, int num);
 bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int stream);
-void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link);
 void snd_soc_dai_action(struct snd_soc_dai *dai,
 			int stream, int action);
 static inline void snd_soc_dai_activate(struct snd_soc_dai *dai,
-- 
cgit v1.2.3


From 90dc34da02aca2fe529ae1ed1822e8fc2a0d9ebc Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 22 Aug 2024 15:55:35 +0100
Subject: ASoC: cs35l56: Make struct regmap_config const

It's now possible to declare instances of struct regmap_config as
const data.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20240822145535.336407-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index a51acefa785f..94e8185c4795 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -282,9 +282,9 @@ static inline bool cs35l56_is_otp_register(unsigned int reg)
 	return (reg >> 16) == 3;
 }
 
-extern struct regmap_config cs35l56_regmap_i2c;
-extern struct regmap_config cs35l56_regmap_spi;
-extern struct regmap_config cs35l56_regmap_sdw;
+extern const struct regmap_config cs35l56_regmap_i2c;
+extern const struct regmap_config cs35l56_regmap_spi;
+extern const struct regmap_config cs35l56_regmap_sdw;
 
 extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls;
 
-- 
cgit v1.2.3


From ae010757a55b57c8b82628e8ea9b7da2269131d9 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 22 Aug 2024 01:41:07 -0700
Subject: bpf: rename nocsr -> bpf_fastcall in verifier

Attribute used by LLVM implementation of the feature had been changed
from no_caller_saved_registers to bpf_fastcall (see [1]).
This commit replaces references to nocsr by references to bpf_fastcall
to keep LLVM and Kernel parts in sync.

[1] https://github.com/llvm/llvm-project/pull/105417

Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240822084112.3257995-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |  6 +++---
 include/linux/bpf_verifier.h | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f0192c173ed8..00dc4dd28cbd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -808,12 +808,12 @@ struct bpf_func_proto {
 	bool gpl_only;
 	bool pkt_access;
 	bool might_sleep;
-	/* set to true if helper follows contract for gcc/llvm
-	 * attribute no_caller_saved_registers:
+	/* set to true if helper follows contract for llvm
+	 * attribute bpf_fastcall:
 	 * - void functions do not scratch r0
 	 * - functions taking N arguments scratch only registers r1-rN
 	 */
-	bool allow_nocsr;
+	bool allow_fastcall;
 	enum bpf_return_type ret_type;
 	union {
 		struct {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5cea15c81b8a..634a302a39e3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -577,13 +577,13 @@ struct bpf_insn_aux_data {
 	bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
 	u8 alu_state; /* used in combination with alu_limit */
 	/* true if STX or LDX instruction is a part of a spill/fill
-	 * pattern for a no_caller_saved_registers call.
+	 * pattern for a bpf_fastcall call.
 	 */
-	u8 nocsr_pattern:1;
+	u8 fastcall_pattern:1;
 	/* for CALL instructions, a number of spill/fill pairs in the
-	 * no_caller_saved_registers pattern.
+	 * bpf_fastcall pattern.
 	 */
-	u8 nocsr_spills_num:3;
+	u8 fastcall_spills_num:3;
 
 	/* below fields are initialized once */
 	unsigned int orig_idx; /* original instruction index */
@@ -653,10 +653,10 @@ struct bpf_subprog_info {
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
 	u16 stack_extra;
-	/* offsets in range [stack_depth .. nocsr_stack_off)
-	 * are used for no_caller_saved_registers spills and fills.
+	/* offsets in range [stack_depth .. fastcall_stack_off)
+	 * are used for bpf_fastcall spills and fills.
 	 */
-	s16 nocsr_stack_off;
+	s16 fastcall_stack_off;
 	bool has_tail_call: 1;
 	bool tail_call_reachable: 1;
 	bool has_ld_abs: 1;
@@ -664,8 +664,8 @@ struct bpf_subprog_info {
 	bool is_async_cb: 1;
 	bool is_exception_cb: 1;
 	bool args_cached: 1;
-	/* true if nocsr stack region is used by functions that can't be inlined */
-	bool keep_nocsr_stack: 1;
+	/* true if bpf_fastcall stack region is used by functions that can't be inlined */
+	bool keep_fastcall_stack: 1;
 
 	u8 arg_cnt;
 	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
-- 
cgit v1.2.3


From aa35e56a5217b86f9c05420c36c908baf3b2df5f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 19 Aug 2024 18:00:19 +0200
Subject: thermal: core: Introduce .should_bind() thermal zone callback

The current design of the code binding cooling devices to trip points in
thermal zones is convoluted and hard to follow.

Namely, a driver that registers a thermal zone can provide .bind()
and .unbind() operations for it, which are required to call either
thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(),
respectively, or thermal_zone_bind_cooling_device() and
thermal_zone_unbind_cooling_device(), respectively, for every relevant
trip point and the given cooling device.  Moreover, if .bind() is
provided and .unbind() is not, the cleanup necessary during the removal
of a thermal zone or a cooling device may not be carried out.

In other words, the core relies on the thermal zone owners to do the
right thing, which is error prone and far from obvious, even though all
of that is not really necessary.  Specifically, if the core could ask
the thermal zone owner, through a special thermal zone callback, whether
or not a given cooling device should be bound to a given trip point in
the given thermal zone, it might as well carry out all of the binding
and unbinding by itself.  In particular, the unbinding can be done
automatically without involving the thermal zone owner at all because
all of the thermal instances associated with a thermal zone or cooling
device going away must be deleted regardless.

Accordingly, introduce a new thermal zone operation, .should_bind(),
that can be invoked by the thermal core for a given thermal zone,
trip point and cooling device combination in order to check whether
or not the cooling device should be bound to the trip point at hand.
It takes an additional cooling_spec argument allowing the thermal
zone owner to specify the highest and lowest cooling states of the
cooling device and its weight for the given trip point binding.

Make the thermal core use this operation, if present, in the absence of
.bind() and .unbind().  Note that .should_bind() will be called under
the thermal zone lock.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Huisong Li <lihuisong@huawei.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/9334403.CDJkKcVGEf@rjwysocki.net
---
 include/linux/thermal.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ba6d74c11620..9c5d1f14224f 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -85,11 +85,21 @@ struct thermal_trip {
 
 struct thermal_zone_device;
 
+struct cooling_spec {
+	unsigned long upper;	/* Highest cooling state  */
+	unsigned long lower;	/* Lowest cooling state  */
+	unsigned int weight;	/* Cooling device weight */
+};
+
 struct thermal_zone_device_ops {
 	int (*bind) (struct thermal_zone_device *,
 		     struct thermal_cooling_device *);
 	int (*unbind) (struct thermal_zone_device *,
 		       struct thermal_cooling_device *);
+	bool (*should_bind) (struct thermal_zone_device *,
+			     const struct thermal_trip *,
+			     struct thermal_cooling_device *,
+			     struct cooling_spec *);
 	int (*get_temp) (struct thermal_zone_device *, int *);
 	int (*set_trips) (struct thermal_zone_device *, int, int);
 	int (*change_mode) (struct thermal_zone_device *,
-- 
cgit v1.2.3


From 2fb85633641770d55b50c934db6b4fb02411a1fe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 19 Aug 2024 18:05:00 +0200
Subject: thermal: core: Unexport thermal_bind_cdev_to_trip() and
 thermal_unbind_cdev_from_trip()

Since thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip()
are only called locally in the thermal core now, they can be static,
so change their definitions accordingly and drop their headers from
the global thermal header file.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Huisong Li <lihuisong@huawei.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/3512161.QJadu78ljV@rjwysocki.net
---
 include/linux/thermal.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 9c5d1f14224f..6599a26847f7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -247,18 +247,10 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
 int thermal_zone_device_id(struct thermal_zone_device *tzd);
 struct device *thermal_zone_device(struct thermal_zone_device *tzd);
 
-int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz,
-			      const struct thermal_trip *trip,
-			      struct thermal_cooling_device *cdev,
-			      unsigned long upper, unsigned long lower,
-			      unsigned int weight);
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 				     struct thermal_cooling_device *,
 				     unsigned long, unsigned long,
 				     unsigned int);
-int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz,
-				  const struct thermal_trip *trip,
-				  struct thermal_cooling_device *cdev);
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *,
-- 
cgit v1.2.3


From d51e783c17bab0c139bf78d6bd9d1f66673f7903 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 16 Aug 2024 17:43:06 +0200
Subject: lsm: count the LSMs enabled at compile time

These macros are a clever trick to determine a count of the number of
LSMs that are enabled in the config to ascertain the maximum number of
static calls that need to be configured per LSM hook.

Without this one would need to generate static calls for the total
number of LSMs in the kernel (even if they are not compiled) times the
number of LSM hooks which ends up being quite wasteful.

Tested-by: Guenter Roeck <linux@roeck-us.net>
Suggested-by: Kui-Feng Lee <sinquersw@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Nacked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: KP Singh <kpsingh@kernel.org>
[PM: added IPE to the count during merge]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/args.h      |   6 +--
 include/linux/lsm_count.h | 135 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/lsm_count.h

(limited to 'include')

diff --git a/include/linux/args.h b/include/linux/args.h
index 8ff60a54eb7d..2e8e65d975c7 100644
--- a/include/linux/args.h
+++ b/include/linux/args.h
@@ -17,9 +17,9 @@
  * that as _n.
  */
 
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
-#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+/* This counts to 15. Any more, it will return 16th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
 
 /* Concatenate two parameters, but allow them to be expanded beforehand. */
 #define __CONCAT(a, b) a ## b
diff --git a/include/linux/lsm_count.h b/include/linux/lsm_count.h
new file mode 100644
index 000000000000..16eb49761b25
--- /dev/null
+++ b/include/linux/lsm_count.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2023 Google LLC.
+ */
+
+#ifndef __LINUX_LSM_COUNT_H
+#define __LINUX_LSM_COUNT_H
+
+#include <linux/args.h>
+
+#ifdef CONFIG_SECURITY
+
+/*
+ * Macros to count the number of LSMs enabled in the kernel at compile time.
+ */
+
+/*
+ * Capabilities is enabled when CONFIG_SECURITY is enabled.
+ */
+#if IS_ENABLED(CONFIG_SECURITY)
+#define CAPABILITIES_ENABLED 1,
+#else
+#define CAPABILITIES_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_SELINUX)
+#define SELINUX_ENABLED 1,
+#else
+#define SELINUX_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_SMACK)
+#define SMACK_ENABLED 1,
+#else
+#define SMACK_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_APPARMOR)
+#define APPARMOR_ENABLED 1,
+#else
+#define APPARMOR_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_TOMOYO)
+#define TOMOYO_ENABLED 1,
+#else
+#define TOMOYO_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_YAMA)
+#define YAMA_ENABLED 1,
+#else
+#define YAMA_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_LOADPIN)
+#define LOADPIN_ENABLED 1,
+#else
+#define LOADPIN_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM)
+#define LOCKDOWN_ENABLED 1,
+#else
+#define LOCKDOWN_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_SAFESETID)
+#define SAFESETID_ENABLED 1,
+#else
+#define SAFESETID_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_BPF_LSM)
+#define BPF_LSM_ENABLED 1,
+#else
+#define BPF_LSM_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_LANDLOCK)
+#define LANDLOCK_ENABLED 1,
+#else
+#define LANDLOCK_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_IMA)
+#define IMA_ENABLED 1,
+#else
+#define IMA_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_EVM)
+#define EVM_ENABLED 1,
+#else
+#define EVM_ENABLED
+#endif
+
+#if IS_ENABLED(CONFIG_SECURITY_IPE)
+#define IPE_ENABLED 1,
+#else
+#define IPE_ENABLED
+#endif
+
+/*
+ *  There is a trailing comma that we need to be accounted for. This is done by
+ *  using a skipped argument in __COUNT_LSMS
+ */
+#define __COUNT_LSMS(skipped_arg, args...) COUNT_ARGS(args...)
+#define COUNT_LSMS(args...) __COUNT_LSMS(args)
+
+#define MAX_LSM_COUNT			\
+	COUNT_LSMS(			\
+		CAPABILITIES_ENABLED	\
+		SELINUX_ENABLED		\
+		SMACK_ENABLED		\
+		APPARMOR_ENABLED	\
+		TOMOYO_ENABLED		\
+		YAMA_ENABLED		\
+		LOADPIN_ENABLED		\
+		LOCKDOWN_ENABLED	\
+		SAFESETID_ENABLED	\
+		BPF_LSM_ENABLED		\
+		LANDLOCK_ENABLED	\
+		IMA_ENABLED		\
+		EVM_ENABLED		\
+		IPE_ENABLED)
+
+#else
+
+#define MAX_LSM_COUNT 0
+
+#endif /* CONFIG_SECURITY */
+
+#endif  /* __LINUX_LSM_COUNT_H */
-- 
cgit v1.2.3


From 417c5643cd67a55f424b203b492082035d0236c3 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 16 Aug 2024 17:43:07 +0200
Subject: lsm: replace indirect LSM hook calls with static calls

LSM hooks are currently invoked from a linked list as indirect calls
which are invoked using retpolines as a mitigation for speculative
attacks (Branch History / Target injection) and add extra overhead which
is especially bad in kernel hot paths:

security_file_ioctl:
   0xff...0320 <+0>:	endbr64
   0xff...0324 <+4>:	push   %rbp
   0xff...0325 <+5>:	push   %r15
   0xff...0327 <+7>:	push   %r14
   0xff...0329 <+9>:	push   %rbx
   0xff...032a <+10>:	mov    %rdx,%rbx
   0xff...032d <+13>:	mov    %esi,%ebp
   0xff...032f <+15>:	mov    %rdi,%r14
   0xff...0332 <+18>:	mov    $0xff...7030,%r15
   0xff...0339 <+25>:	mov    (%r15),%r15
   0xff...033c <+28>:	test   %r15,%r15
   0xff...033f <+31>:	je     0xff...0358 <security_file_ioctl+56>
   0xff...0341 <+33>:	mov    0x18(%r15),%r11
   0xff...0345 <+37>:	mov    %r14,%rdi
   0xff...0348 <+40>:	mov    %ebp,%esi
   0xff...034a <+42>:	mov    %rbx,%rdx

   0xff...034d <+45>:	call   0xff...2e0 <__x86_indirect_thunk_array+352>
   			       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    Indirect calls that use retpolines leading to overhead, not just due
    to extra instruction but also branch misses.

   0xff...0352 <+50>:	test   %eax,%eax
   0xff...0354 <+52>:	je     0xff...0339 <security_file_ioctl+25>
   0xff...0356 <+54>:	jmp    0xff...035a <security_file_ioctl+58>
   0xff...0358 <+56>:	xor    %eax,%eax
   0xff...035a <+58>:	pop    %rbx
   0xff...035b <+59>:	pop    %r14
   0xff...035d <+61>:	pop    %r15
   0xff...035f <+63>:	pop    %rbp
   0xff...0360 <+64>:	jmp    0xff...47c4 <__x86_return_thunk>

The indirect calls are not really needed as one knows the addresses of
enabled LSM callbacks at boot time and only the order can possibly
change at boot time with the lsm= kernel command line parameter.

An array of static calls is defined per LSM hook and the static calls
are updated at boot time once the order has been determined.

With the hook now exposed as a static call, one can see that the
retpolines are no longer there and the LSM callbacks are invoked
directly:

security_file_ioctl:
   0xff...0ca0 <+0>:	endbr64
   0xff...0ca4 <+4>:	nopl   0x0(%rax,%rax,1)
   0xff...0ca9 <+9>:	push   %rbp
   0xff...0caa <+10>:	push   %r14
   0xff...0cac <+12>:	push   %rbx
   0xff...0cad <+13>:	mov    %rdx,%rbx
   0xff...0cb0 <+16>:	mov    %esi,%ebp
   0xff...0cb2 <+18>:	mov    %rdi,%r14
   0xff...0cb5 <+21>:	jmp    0xff...0cc7 <security_file_ioctl+39>
  			       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   Static key enabled for SELinux

   0xffffffff818f0cb7 <+23>:	jmp    0xff...0cde <security_file_ioctl+62>
   				^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

   Static key enabled for BPF LSM. This is something that is changed to
   default to false to avoid the existing side effect issues of BPF LSM
   [1] in a subsequent patch.

   0xff...0cb9 <+25>:	xor    %eax,%eax
   0xff...0cbb <+27>:	xchg   %ax,%ax
   0xff...0cbd <+29>:	pop    %rbx
   0xff...0cbe <+30>:	pop    %r14
   0xff...0cc0 <+32>:	pop    %rbp
   0xff...0cc1 <+33>:	cs jmp 0xff...0000 <__x86_return_thunk>
   0xff...0cc7 <+39>:	endbr64
   0xff...0ccb <+43>:	mov    %r14,%rdi
   0xff...0cce <+46>:	mov    %ebp,%esi
   0xff...0cd0 <+48>:	mov    %rbx,%rdx
   0xff...0cd3 <+51>:	call   0xff...3230 <selinux_file_ioctl>
   			       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   Direct call to SELinux.

   0xff...0cd8 <+56>:	test   %eax,%eax
   0xff...0cda <+58>:	jne    0xff...0cbd <security_file_ioctl+29>
   0xff...0cdc <+60>:	jmp    0xff...0cb7 <security_file_ioctl+23>
   0xff...0cde <+62>:	endbr64
   0xff...0ce2 <+66>:	mov    %r14,%rdi
   0xff...0ce5 <+69>:	mov    %ebp,%esi
   0xff...0ce7 <+71>:	mov    %rbx,%rdx
   0xff...0cea <+74>:	call   0xff...e220 <bpf_lsm_file_ioctl>
   			       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   Direct call to BPF LSM.

   0xff...0cef <+79>:	test   %eax,%eax
   0xff...0cf1 <+81>:	jne    0xff...0cbd <security_file_ioctl+29>
   0xff...0cf3 <+83>:	jmp    0xff...0cb9 <security_file_ioctl+25>
   0xff...0cf5 <+85>:	endbr64
   0xff...0cf9 <+89>:	mov    %r14,%rdi
   0xff...0cfc <+92>:	mov    %ebp,%esi
   0xff...0cfe <+94>:	mov    %rbx,%rdx
   0xff...0d01 <+97>:	pop    %rbx
   0xff...0d02 <+98>:	pop    %r14
   0xff...0d04 <+100>:	pop    %rbp
   0xff...0d05 <+101>:	ret
   0xff...0d06 <+102>:	int3
   0xff...0d07 <+103>:	int3
   0xff...0d08 <+104>:	int3
   0xff...0d09 <+105>:	int3

While this patch uses static_branch_unlikely indicating that an LSM hook
is likely to be not present. In most cases this is still a better choice
as even when an LSM with one hook is added, empty slots are created for
all LSM hooks (especially when many LSMs that do not initialize most
hooks are present on the system).

There are some hooks that don't use the call_int_hook or
call_void_hook. These hooks are updated to use a new macro called
lsm_for_each_hook where the lsm_callback is directly invoked as an
indirect call.

Below are results of the relevant Unixbench system benchmarks with BPF LSM
and SELinux enabled with default policies enabled with and without these
patches.

Benchmark                                          Delta(%): (+ is better)
==========================================================================
Execl Throughput                                             +1.9356
File Write 1024 bufsize 2000 maxblocks                       +6.5953
Pipe Throughput                                              +9.5499
Pipe-based Context Switching                                 +3.0209
Process Creation                                             +2.3246
Shell Scripts (1 concurrent)                                 +1.4975
System Call Overhead                                         +2.7815
System Benchmarks Index Score (Partial Only):                +3.4859

In the best case, some syscalls like eventfd_create benefitted to about
~10%.

Tested-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 52 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 4687985b9175..090d1d3e19fe 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -30,19 +30,47 @@
 #include <linux/init.h>
 #include <linux/rculist.h>
 #include <linux/xattr.h>
+#include <linux/static_call.h>
+#include <linux/unroll.h>
+#include <linux/jump_label.h>
+#include <linux/lsm_count.h>
 
 union security_list_options {
 	#define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
 	#include "lsm_hook_defs.h"
 	#undef LSM_HOOK
+	void *lsm_func_addr;
 };
 
-struct security_hook_heads {
-	#define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME;
-	#include "lsm_hook_defs.h"
-	#undef LSM_HOOK
+/*
+ * @key: static call key as defined by STATIC_CALL_KEY
+ * @trampoline: static call trampoline as defined by STATIC_CALL_TRAMP
+ * @hl: The security_hook_list as initialized by the owning LSM.
+ * @active: Enabled when the static call has an LSM hook associated.
+ */
+struct lsm_static_call {
+	struct static_call_key *key;
+	void *trampoline;
+	struct security_hook_list *hl;
+	/* this needs to be true or false based on what the key defaults to */
+	struct static_key_false *active;
 } __randomize_layout;
 
+/*
+ * Table of the static calls for each LSM hook.
+ * Once the LSMs are initialized, their callbacks will be copied to these
+ * tables such that the calls are filled backwards (from last to first).
+ * This way, we can jump directly to the first used static call, and execute
+ * all of them after. This essentially makes the entry point
+ * dynamic to adapt the number of static calls to the number of callbacks.
+ */
+struct lsm_static_calls_table {
+	#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
+		struct lsm_static_call NAME[MAX_LSM_COUNT];
+	#include <linux/lsm_hook_defs.h>
+	#undef LSM_HOOK
+} __packed __randomize_layout;
+
 /**
  * struct lsm_id - Identify a Linux Security Module.
  * @lsm: name of the LSM, must be approved by the LSM maintainers
@@ -58,10 +86,14 @@ struct lsm_id {
 /*
  * Security module hook list structure.
  * For use with generic list macros for common operations.
+ *
+ * struct security_hook_list - Contents of a cacheable, mappable object.
+ * @scalls: The beginning of the array of static calls assigned to this hook.
+ * @hook: The callback for the hook.
+ * @lsm: The name of the lsm that owns this hook.
  */
 struct security_hook_list {
-	struct hlist_node list;
-	struct hlist_head *head;
+	struct lsm_static_call *scalls;
 	union security_list_options hook;
 	const struct lsm_id *lsmid;
 } __randomize_layout;
@@ -98,8 +130,11 @@ struct lsm_blob_sizes {
  * care of the common case and reduces the amount of
  * text involved.
  */
-#define LSM_HOOK_INIT(HEAD, HOOK) \
-	{ .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } }
+#define LSM_HOOK_INIT(NAME, HOOK)			\
+	{						\
+		.scalls = static_calls_table.NAME,	\
+		.hook = { .NAME = HOOK }		\
+	}
 
 extern void security_add_hooks(struct security_hook_list *hooks, int count,
 			       const struct lsm_id *lsmid);
@@ -134,7 +169,6 @@ struct lsm_info {
 
 /* DO NOT tamper with these variables outside of the LSM framework */
 extern char *lsm_names;
-extern struct security_hook_heads security_hook_heads;
 extern struct lsm_static_calls_table static_calls_table __ro_after_init;
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
 extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
-- 
cgit v1.2.3


From 6c65914a40d33e96ceedc757be0129139cdf7852 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 16 Aug 2024 11:54:23 -0700
Subject: Input: keypad-nomadik-ske - remove the driver

The users of this driver were removed in 2013 in commit 28633c54bda6
("ARM: ux500: Rip out keypad initialisation which is no longer used").

Remove the driver as well.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/Zr-gX0dfN4te_8VG@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/keypad-nomadik-ske.h | 50 ------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 include/linux/platform_data/keypad-nomadik-ske.h

(limited to 'include')

diff --git a/include/linux/platform_data/keypad-nomadik-ske.h b/include/linux/platform_data/keypad-nomadik-ske.h
deleted file mode 100644
index 7efabbca1dca..000000000000
--- a/include/linux/platform_data/keypad-nomadik-ske.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Naveen Kumar Gaddipati <naveen.gaddipati@stericsson.com>
- *
- * ux500 Scroll key and Keypad Encoder (SKE) header
- */
-
-#ifndef __SKE_H
-#define __SKE_H
-
-#include <linux/input/matrix_keypad.h>
-
-/* register definitions for SKE peripheral */
-#define SKE_CR		0x00
-#define SKE_VAL0	0x04
-#define SKE_VAL1	0x08
-#define SKE_DBCR	0x0C
-#define SKE_IMSC	0x10
-#define SKE_RIS		0x14
-#define SKE_MIS		0x18
-#define SKE_ICR		0x1C
-
-/*
- * Keypad module
- */
-
-/**
- * struct keypad_platform_data - structure for platform specific data
- * @init:	pointer to keypad init function
- * @exit:	pointer to keypad deinitialisation function
- * @keymap_data: matrix scan code table for keycodes
- * @krow:	maximum number of rows
- * @kcol:	maximum number of columns
- * @debounce_ms: platform specific debounce time
- * @no_autorepeat: flag for auto repetition
- * @wakeup_enable: allow waking up the system
- */
-struct ske_keypad_platform_data {
-	int (*init)(void);
-	int (*exit)(void);
-	const struct matrix_keymap_data *keymap_data;
-	u8 krow;
-	u8 kcol;
-	u8 debounce_ms;
-	bool no_autorepeat;
-	bool wakeup_enable;
-};
-#endif	/*__SKE_KPD_H*/
-- 
cgit v1.2.3


From 5bc46b49c828a6dfaab80b71ecb63fe76a1096d2 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Mon, 22 Jul 2024 14:02:20 +0200
Subject: nvme-tcp: check for invalidated or revoked key

key_lookup() will always return a key, even if that key is revoked
or invalidated. So check for invalid keys before continuing.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-keyring.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h
index e10333d78dbb..19d2b256180f 100644
--- a/include/linux/nvme-keyring.h
+++ b/include/linux/nvme-keyring.h
@@ -12,7 +12,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring,
 		const char *hostnqn, const char *subnqn);
 
 key_serial_t nvme_keyring_id(void);
-
+struct key *nvme_tls_key_lookup(key_serial_t key_id);
 #else
 
 static inline key_serial_t nvme_tls_psk_default(struct key *keyring,
@@ -24,5 +24,9 @@ static inline key_serial_t nvme_keyring_id(void)
 {
 	return 0;
 }
+static inline struct key *nvme_tls_key_lookup(key_serial_t key_id)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
 #endif /* !CONFIG_NVME_KEYRING */
 #endif /* _NVME_KEYRING_H */
-- 
cgit v1.2.3


From 5ac86f0ed04bce41242167ffa12ad92038788a95 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 10 Jul 2024 16:15:55 -0700
Subject: virt: vbox: struct vmmdev_hgcm_pagelist: Replace 1-element array with
 flexible array

Replace the deprecated[1] use of a 1-element array in
struct vmmdev_hgcm_pagelist with a modern flexible array. As this is
UAPI, we cannot trivially change the size of the struct, so use a union
to retain the old first element's size, but switch "pages" to a flexible
array.

No binary differences are present after this conversion.

Link: https://github.com/KSPP/linux/issues/79 [1]
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20240710231555.work.406-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/uapi/linux/vbox_vmmdev_types.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h
index f8a8d6b3c521..6073858d52a2 100644
--- a/include/uapi/linux/vbox_vmmdev_types.h
+++ b/include/uapi/linux/vbox_vmmdev_types.h
@@ -282,7 +282,10 @@ struct vmmdev_hgcm_pagelist {
 	__u32 flags;             /** VMMDEV_HGCM_F_PARM_*. */
 	__u16 offset_first_page; /** Data offset in the first page. */
 	__u16 page_count;        /** Number of pages. */
-	__u64 pages[1];          /** Page addresses. */
+	union {
+		__u64 unused;	/** Deprecated place-holder for first "pages" entry. */
+		__DECLARE_FLEX_ARRAY(__u64, pages); /** Page addresses. */
+	};
 };
 VMMDEV_ASSERT_SIZE(vmmdev_hgcm_pagelist, 4 + 2 + 2 + 8);
 
-- 
cgit v1.2.3


From 559048d156ff3391c4b793779a824c9193e20442 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 5 Aug 2024 14:43:44 -0700
Subject: string: Check for "nonstring" attribute on strscpy() arguments

GCC already checks for arguments that are marked with the "nonstring"[1]
attribute when used on standard C String API functions (e.g. strcpy). Gain
this compile-time checking also for the kernel's primary string copying
function, strscpy().

Note that Clang has neither "nonstring" nor __builtin_has_attribute().

Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute [1]
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Tested-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/r/20240805214340.work.339-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler.h       |  3 +++
 include/linux/compiler_types.h |  7 +++++++
 include/linux/string.h         | 12 ++++++++----
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 2df665fa2964..ec55bcce4146 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -242,6 +242,9 @@ static inline void *offset_to_ptr(const int *off)
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
+/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
+#define __must_be_cstr(p)	BUILD_BUG_ON_ZERO(__annotated(p, nonstring))
+
 /*
  * This returns a constant expression while determining if an argument is
  * a constant expression, most importantly without evaluating the argument.
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index f14c275950b5..1a957ea2f4fe 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -421,6 +421,13 @@ struct ftrace_likely_data {
 #define __member_size(p)	__builtin_object_size(p, 1)
 #endif
 
+/* Determine if an attribute has been applied to a variable. */
+#if __has_builtin(__builtin_has_attribute)
+#define __annotated(var, attr)	__builtin_has_attribute(var, attr)
+#else
+#define __annotated(var, attr)	(false)
+#endif
+
 /*
  * Some versions of gcc do not mark 'asm goto' volatile:
  *
diff --git a/include/linux/string.h b/include/linux/string.h
index 9edace076ddb..95b3fc308f4f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -76,12 +76,16 @@ ssize_t sized_strscpy(char *, const char *, size_t);
  * known size.
  */
 #define __strscpy0(dst, src, ...)	\
-	sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst))
-#define __strscpy1(dst, src, size)	sized_strscpy(dst, src, size)
+	sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst) +	\
+				__must_be_cstr(dst) + __must_be_cstr(src))
+#define __strscpy1(dst, src, size)	\
+	sized_strscpy(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src))
 
 #define __strscpy_pad0(dst, src, ...)	\
-	sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst))
-#define __strscpy_pad1(dst, src, size)	sized_strscpy_pad(dst, src, size)
+	sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst) +	\
+				    __must_be_cstr(dst) + __must_be_cstr(src))
+#define __strscpy_pad1(dst, src, size)	\
+	sized_strscpy_pad(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src))
 
 /**
  * strscpy - Copy a C-string into a sized buffer
-- 
cgit v1.2.3


From 89835a58f5f54d52537709f2513fb91024e2d069 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Wed, 21 Aug 2024 08:54:11 +0300
Subject: scsi: ufs: Move UFS trace events to private header

UFS trace events are called exclusively from the UFS core drivers.  Make
those events private to the core driver.

The MAINTAINERS file does not need updating as the maintainership remains
the same and the relevant directory is already covered.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20240821055411.3128159-1-avri.altman@wdc.com
Acked-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/trace/events/ufs.h | 399 ---------------------------------------------
 include/ufs/ufs.h          |   4 +-
 2 files changed, 2 insertions(+), 401 deletions(-)
 delete mode 100644 include/trace/events/ufs.h

(limited to 'include')

diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h
deleted file mode 100644
index c4e209fbdfbb..000000000000
--- a/include/trace/events/ufs.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
- */
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM ufs
-
-#if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_UFS_H
-
-#include <linux/tracepoint.h>
-
-#define str_opcode(opcode)						\
-	__print_symbolic(opcode,					\
-		{ WRITE_16,		"WRITE_16" },			\
-		{ WRITE_10,		"WRITE_10" },			\
-		{ READ_16,		"READ_16" },			\
-		{ READ_10,		"READ_10" },			\
-		{ SYNCHRONIZE_CACHE,	"SYNC" },			\
-		{ UNMAP,		"UNMAP" })
-
-#define UFS_LINK_STATES						\
-	EM(UIC_LINK_OFF_STATE,		"UIC_LINK_OFF_STATE")		\
-	EM(UIC_LINK_ACTIVE_STATE,	"UIC_LINK_ACTIVE_STATE")	\
-	EMe(UIC_LINK_HIBERN8_STATE,	"UIC_LINK_HIBERN8_STATE")
-
-#define UFS_PWR_MODES							\
-	EM(UFS_ACTIVE_PWR_MODE,		"UFS_ACTIVE_PWR_MODE")		\
-	EM(UFS_SLEEP_PWR_MODE,		"UFS_SLEEP_PWR_MODE")		\
-	EM(UFS_POWERDOWN_PWR_MODE,	"UFS_POWERDOWN_PWR_MODE")	\
-	EMe(UFS_DEEPSLEEP_PWR_MODE,	"UFS_DEEPSLEEP_PWR_MODE")
-
-#define UFSCHD_CLK_GATING_STATES				\
-	EM(CLKS_OFF,			"CLKS_OFF")		\
-	EM(CLKS_ON,			"CLKS_ON")		\
-	EM(REQ_CLKS_OFF,		"REQ_CLKS_OFF")		\
-	EMe(REQ_CLKS_ON,		"REQ_CLKS_ON")
-
-#define UFS_CMD_TRACE_STRINGS					\
-	EM(UFS_CMD_SEND,	"send_req")			\
-	EM(UFS_CMD_COMP,	"complete_rsp")			\
-	EM(UFS_DEV_COMP,	"dev_complete")			\
-	EM(UFS_QUERY_SEND,	"query_send")			\
-	EM(UFS_QUERY_COMP,	"query_complete")		\
-	EM(UFS_QUERY_ERR,	"query_complete_err")		\
-	EM(UFS_TM_SEND,		"tm_send")			\
-	EM(UFS_TM_COMP,		"tm_complete")			\
-	EMe(UFS_TM_ERR,		"tm_complete_err")
-
-#define UFS_CMD_TRACE_TSF_TYPES					\
-	EM(UFS_TSF_CDB,		"CDB")		                \
-	EM(UFS_TSF_OSF,		"OSF")		                \
-	EM(UFS_TSF_TM_INPUT,	"TM_INPUT")                     \
-	EMe(UFS_TSF_TM_OUTPUT,	"TM_OUTPUT")
-
-/* Enums require being exported to userspace, for user tool parsing */
-#undef EM
-#undef EMe
-#define EM(a, b)	TRACE_DEFINE_ENUM(a);
-#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
-
-UFS_LINK_STATES;
-UFS_PWR_MODES;
-UFSCHD_CLK_GATING_STATES;
-UFS_CMD_TRACE_STRINGS
-UFS_CMD_TRACE_TSF_TYPES
-
-/*
- * Now redefine the EM() and EMe() macros to map the enums to the strings
- * that will be printed in the output.
- */
-#undef EM
-#undef EMe
-#define EM(a, b)	{a, b},
-#define EMe(a, b)	{a, b}
-
-#define show_ufs_cmd_trace_str(str_t)	\
-				__print_symbolic(str_t, UFS_CMD_TRACE_STRINGS)
-#define show_ufs_cmd_trace_tsf(tsf)	\
-				__print_symbolic(tsf, UFS_CMD_TRACE_TSF_TYPES)
-
-TRACE_EVENT(ufshcd_clk_gating,
-
-	TP_PROTO(const char *dev_name, int state),
-
-	TP_ARGS(dev_name, state),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__field(int, state)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__entry->state = state;
-	),
-
-	TP_printk("%s: gating state changed to %s",
-		__get_str(dev_name),
-		__print_symbolic(__entry->state, UFSCHD_CLK_GATING_STATES))
-);
-
-TRACE_EVENT(ufshcd_clk_scaling,
-
-	TP_PROTO(const char *dev_name, const char *state, const char *clk,
-		u32 prev_state, u32 curr_state),
-
-	TP_ARGS(dev_name, state, clk, prev_state, curr_state),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__string(state, state)
-		__string(clk, clk)
-		__field(u32, prev_state)
-		__field(u32, curr_state)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__assign_str(state);
-		__assign_str(clk);
-		__entry->prev_state = prev_state;
-		__entry->curr_state = curr_state;
-	),
-
-	TP_printk("%s: %s %s from %u to %u Hz",
-		__get_str(dev_name), __get_str(state), __get_str(clk),
-		__entry->prev_state, __entry->curr_state)
-);
-
-TRACE_EVENT(ufshcd_auto_bkops_state,
-
-	TP_PROTO(const char *dev_name, const char *state),
-
-	TP_ARGS(dev_name, state),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__string(state, state)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__assign_str(state);
-	),
-
-	TP_printk("%s: auto bkops - %s",
-		__get_str(dev_name), __get_str(state))
-);
-
-DECLARE_EVENT_CLASS(ufshcd_profiling_template,
-	TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us,
-		 int err),
-
-	TP_ARGS(dev_name, profile_info, time_us, err),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__string(profile_info, profile_info)
-		__field(s64, time_us)
-		__field(int, err)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__assign_str(profile_info);
-		__entry->time_us = time_us;
-		__entry->err = err;
-	),
-
-	TP_printk("%s: %s: took %lld usecs, err %d",
-		__get_str(dev_name), __get_str(profile_info),
-		__entry->time_us, __entry->err)
-);
-
-DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_hibern8,
-	TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us,
-		 int err),
-	TP_ARGS(dev_name, profile_info, time_us, err));
-
-DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_gating,
-	TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us,
-		 int err),
-	TP_ARGS(dev_name, profile_info, time_us, err));
-
-DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_scaling,
-	TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us,
-		 int err),
-	TP_ARGS(dev_name, profile_info, time_us, err));
-
-DECLARE_EVENT_CLASS(ufshcd_template,
-	TP_PROTO(const char *dev_name, int err, s64 usecs,
-		 int dev_state, int link_state),
-
-	TP_ARGS(dev_name, err, usecs, dev_state, link_state),
-
-	TP_STRUCT__entry(
-		__field(s64, usecs)
-		__field(int, err)
-		__string(dev_name, dev_name)
-		__field(int, dev_state)
-		__field(int, link_state)
-	),
-
-	TP_fast_assign(
-		__entry->usecs = usecs;
-		__entry->err = err;
-		__assign_str(dev_name);
-		__entry->dev_state = dev_state;
-		__entry->link_state = link_state;
-	),
-
-	TP_printk(
-		"%s: took %lld usecs, dev_state: %s, link_state: %s, err %d",
-		__get_str(dev_name),
-		__entry->usecs,
-		__print_symbolic(__entry->dev_state, UFS_PWR_MODES),
-		__print_symbolic(__entry->link_state, UFS_LINK_STATES),
-		__entry->err
-	)
-);
-
-DEFINE_EVENT(ufshcd_template, ufshcd_system_suspend,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_system_resume,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_runtime_suspend,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_runtime_resume,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_init,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_wl_suspend,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_wl_resume,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_suspend,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_resume,
-	     TP_PROTO(const char *dev_name, int err, s64 usecs,
-		      int dev_state, int link_state),
-	     TP_ARGS(dev_name, err, usecs, dev_state, link_state));
-
-TRACE_EVENT(ufshcd_command,
-	TP_PROTO(struct scsi_device *sdev, enum ufs_trace_str_t str_t,
-		 unsigned int tag, u32 doorbell, u32 hwq_id, int transfer_len,
-		 u32 intr, u64 lba, u8 opcode, u8 group_id),
-
-	TP_ARGS(sdev, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba,
-		opcode, group_id),
-
-	TP_STRUCT__entry(
-		__field(struct scsi_device *, sdev)
-		__field(enum ufs_trace_str_t, str_t)
-		__field(unsigned int, tag)
-		__field(u32, doorbell)
-		__field(u32, hwq_id)
-		__field(u32, intr)
-		__field(u64, lba)
-		__field(int, transfer_len)
-		__field(u8, opcode)
-		__field(u8, group_id)
-	),
-
-	TP_fast_assign(
-		__entry->sdev = sdev;
-		__entry->str_t = str_t;
-		__entry->tag = tag;
-		__entry->doorbell = doorbell;
-		__entry->hwq_id = hwq_id;
-		__entry->intr = intr;
-		__entry->lba = lba;
-		__entry->transfer_len = transfer_len;
-		__entry->opcode = opcode;
-		__entry->group_id = group_id;
-	),
-
-	TP_printk(
-		"%s: %s: tag: %u, DB: 0x%x, size: %d, IS: %u, LBA: %llu, opcode: 0x%x (%s), group_id: 0x%x, hwq_id: %d",
-		show_ufs_cmd_trace_str(__entry->str_t),
-		dev_name(&__entry->sdev->sdev_dev), __entry->tag,
-		__entry->doorbell, __entry->transfer_len, __entry->intr,
-		__entry->lba, (u32)__entry->opcode, str_opcode(__entry->opcode),
-		(u32)__entry->group_id, __entry->hwq_id
-	)
-);
-
-TRACE_EVENT(ufshcd_uic_command,
-	TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, u32 cmd,
-		 u32 arg1, u32 arg2, u32 arg3),
-
-	TP_ARGS(dev_name, str_t, cmd, arg1, arg2, arg3),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__field(enum ufs_trace_str_t, str_t)
-		__field(u32, cmd)
-		__field(u32, arg1)
-		__field(u32, arg2)
-		__field(u32, arg3)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__entry->str_t = str_t;
-		__entry->cmd = cmd;
-		__entry->arg1 = arg1;
-		__entry->arg2 = arg2;
-		__entry->arg3 = arg3;
-	),
-
-	TP_printk(
-		"%s: %s: cmd: 0x%x, arg1: 0x%x, arg2: 0x%x, arg3: 0x%x",
-		show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name),
-		__entry->cmd, __entry->arg1, __entry->arg2, __entry->arg3
-	)
-);
-
-TRACE_EVENT(ufshcd_upiu,
-	TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, void *hdr,
-		 void *tsf, enum ufs_trace_tsf_t tsf_t),
-
-	TP_ARGS(dev_name, str_t, hdr, tsf, tsf_t),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__field(enum ufs_trace_str_t, str_t)
-		__array(unsigned char, hdr, 12)
-		__array(unsigned char, tsf, 16)
-		__field(enum ufs_trace_tsf_t, tsf_t)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__entry->str_t = str_t;
-		memcpy(__entry->hdr, hdr, sizeof(__entry->hdr));
-		memcpy(__entry->tsf, tsf, sizeof(__entry->tsf));
-		__entry->tsf_t = tsf_t;
-	),
-
-	TP_printk(
-		"%s: %s: HDR:%s, %s:%s",
-		show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name),
-		__print_hex(__entry->hdr, sizeof(__entry->hdr)),
-		show_ufs_cmd_trace_tsf(__entry->tsf_t),
-		__print_hex(__entry->tsf, sizeof(__entry->tsf))
-	)
-);
-
-TRACE_EVENT(ufshcd_exception_event,
-
-	TP_PROTO(const char *dev_name, u16 status),
-
-	TP_ARGS(dev_name, status),
-
-	TP_STRUCT__entry(
-		__string(dev_name, dev_name)
-		__field(u16, status)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name);
-		__entry->status = status;
-	),
-
-	TP_printk("%s: status 0x%x",
-		__get_str(dev_name), __entry->status
-	)
-);
-
-#endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h
index 853e95957c31..e594abe5d05f 100644
--- a/include/ufs/ufs.h
+++ b/include/ufs/ufs.h
@@ -597,7 +597,7 @@ struct ufs_dev_info {
 };
 
 /*
- * This enum is used in string mapping in include/trace/events/ufs.h.
+ * This enum is used in string mapping in ufs_trace.h.
  */
 enum ufs_trace_str_t {
 	UFS_CMD_SEND, UFS_CMD_COMP, UFS_DEV_COMP,
@@ -607,7 +607,7 @@ enum ufs_trace_str_t {
 
 /*
  * Transaction Specific Fields (TSF) type in the UPIU package, this enum is
- * used in include/trace/events/ufs.h for UFS command trace.
+ * used in ufs_trace.h for UFS command trace.
  */
 enum ufs_trace_tsf_t {
 	UFS_TSF_CDB, UFS_TSF_OSF, UFS_TSF_TM_INPUT, UFS_TSF_TM_OUTPUT
-- 
cgit v1.2.3


From d77381c2f62a557f630a64280ff09675128be363 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 22 Aug 2024 12:59:10 -0700
Subject: scsi: fcoe: Simplify alloc_ordered_workqueue() invocations

Let alloc_ordered_workqueue() format the workqueue name instead of calling
snprintf() explicitly.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240822195944.654691-7-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/fcoe_sysfs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/fcoe_sysfs.h b/include/scsi/fcoe_sysfs.h
index 4b1216de3f22..2b28a05e492b 100644
--- a/include/scsi/fcoe_sysfs.h
+++ b/include/scsi/fcoe_sysfs.h
@@ -50,9 +50,7 @@ struct fcoe_ctlr_device {
 	struct fcoe_sysfs_function_template *f;
 
 	struct list_head		fcfs;
-	char				work_q_name[20];
 	struct workqueue_struct		*work_q;
-	char				devloss_work_q_name[20];
 	struct workqueue_struct		*devloss_work_q;
 	struct mutex			lock;
 
-- 
cgit v1.2.3


From 06d53789761cad6bbbc3c00e8c96a631a41f4bf3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 22 Aug 2024 12:59:19 -0700
Subject: scsi: scsi_transport_fc: Simplify alloc_workqueue() invocations

Let alloc_workqueue() format the workqueue name instead of calling
snprintf() explicitly.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240822195944.654691-16-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_transport_fc.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 4b884b8013e0..8e6c60090c62 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -575,9 +575,7 @@ struct fc_host_attrs {
 	u16 npiv_vports_inuse;
 
 	/* work queues for rport state manipulation */
-	char work_q_name[20];
 	struct workqueue_struct *work_q;
-	char devloss_work_q_name[20];
 	struct workqueue_struct *devloss_work_q;
 
 	/* bsg support */
@@ -654,12 +652,8 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->next_vport_number)
 #define fc_host_npiv_vports_inuse(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse)
-#define fc_host_work_q_name(x) \
-	(((struct fc_host_attrs *)(x)->shost_data)->work_q_name)
 #define fc_host_work_q(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->work_q)
-#define fc_host_devloss_work_q_name(x) \
-	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name)
 #define fc_host_devloss_work_q(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
 #define fc_host_dev_loss_tmo(x) \
-- 
cgit v1.2.3


From ba52850cb6b4db5f4ec4636c73d2ad85d0e9adba Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 22 Aug 2024 12:59:22 -0700
Subject: scsi: core: Simplify an alloc_workqueue() invocation

Let alloc_workqueue() format the workqueue name. Remove the
work_q_name[] member from struct Scsi_Host because it is no longer
used by any SCSI driver nor by the SCSI core.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240822195944.654691-19-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 19a1c5c48935..2b4ab0369ffb 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -677,7 +677,6 @@ struct Scsi_Host {
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
-	char work_q_name[20];
 	struct workqueue_struct *work_q;
 
 	/*
-- 
cgit v1.2.3


From 54f2f78d6b9f1f90e91aaf5dbb34a6198f65fdfd Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Wed, 21 Aug 2024 16:30:13 +0100
Subject: xfrm: Correct spelling in xfrm.h

Correct spelling in xfrm.h.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 54cef89f6c1e..f7244ac4fa08 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -79,7 +79,7 @@
    policy entry has list of up to XFRM_MAX_DEPTH transformations,
    described by templates xfrm_tmpl. Each template is resolved
    to a complete xfrm_state (see below) and we pack bundle of transformations
-   to a dst_entry returned to requestor.
+   to a dst_entry returned to requester.
 
    dst -. xfrm  .-> xfrm_state #1
     |---. child .-> dst -. xfrm .-> xfrm_state #2
@@ -1016,7 +1016,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
 struct xfrm_if_parms {
 	int link;		/* ifindex of underlying L2 interface */
-	u32 if_id;		/* interface identifyer */
+	u32 if_id;		/* interface identifier */
 	bool collect_md;
 };
 
-- 
cgit v1.2.3


From 997daa8de64ccbb4dc68d250510893597d485de4 Mon Sep 17 00:00:00 2001
From: Sunyeal Hong <sunyeal.hong@samsung.com>
Date: Thu, 22 Aug 2024 08:26:49 +0900
Subject: dt-bindings: clock: add ExynosAuto v920 SoC CMU bindings

Add dt-schema for ExynosAuto v920 SoC clock controller.
Add device tree clock binding definitions for below CMU blocks.

- CMU_TOP
- CMU_PERIC0/1
- CMU_MISC
- CMU_HSI0/1

Signed-off-by: Sunyeal Hong <sunyeal.hong@samsung.com>
Link: https://lore.kernel.org/r/20240821232652.1077701-2-sunyeal.hong@samsung.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/samsung,exynosautov920.h | 191 +++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 include/dt-bindings/clock/samsung,exynosautov920.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h
new file mode 100644
index 000000000000..c720f344b6bf
--- /dev/null
+++ b/include/dt-bindings/clock/samsung,exynosautov920.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd.
+ * Author: Sunyeal Hong <sunyeal.hong@samsung.com>
+ *
+ * Device Tree binding constants for ExynosAuto v920 clock controller.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H
+#define _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H
+
+/* CMU_TOP */
+#define FOUT_SHARED0_PLL		1
+#define FOUT_SHARED1_PLL		2
+#define FOUT_SHARED2_PLL		3
+#define FOUT_SHARED3_PLL		4
+#define FOUT_SHARED4_PLL		5
+#define FOUT_SHARED5_PLL		6
+#define FOUT_MMC_PLL			7
+
+/* MUX in CMU_TOP */
+#define MOUT_SHARED0_PLL		8
+#define MOUT_SHARED1_PLL		9
+#define MOUT_SHARED2_PLL		10
+#define MOUT_SHARED3_PLL		11
+#define MOUT_SHARED4_PLL		12
+#define MOUT_SHARED5_PLL		13
+#define MOUT_MMC_PLL			14
+#define MOUT_CLKCMU_CMU_BOOST		15
+#define MOUT_CLKCMU_CMU_CMUREF		16
+#define MOUT_CLKCMU_ACC_NOC		17
+#define MOUT_CLKCMU_ACC_ORB		18
+#define MOUT_CLKCMU_APM_NOC		19
+#define MOUT_CLKCMU_AUD_CPU		20
+#define MOUT_CLKCMU_AUD_NOC		21
+#define MOUT_CLKCMU_CPUCL0_SWITCH	22
+#define MOUT_CLKCMU_CPUCL0_CLUSTER	23
+#define MOUT_CLKCMU_CPUCL0_DBG		24
+#define MOUT_CLKCMU_CPUCL1_SWITCH	25
+#define MOUT_CLKCMU_CPUCL1_CLUSTER	26
+#define MOUT_CLKCMU_CPUCL2_SWITCH	27
+#define MOUT_CLKCMU_CPUCL2_CLUSTER	28
+#define MOUT_CLKCMU_DNC_NOC		29
+#define MOUT_CLKCMU_DPTX_NOC		30
+#define MOUT_CLKCMU_DPTX_DPGTC		31
+#define MOUT_CLKCMU_DPTX_DPOSC		32
+#define MOUT_CLKCMU_DPUB_NOC		33
+#define MOUT_CLKCMU_DPUB_DSIM		34
+#define MOUT_CLKCMU_DPUF0_NOC		35
+#define MOUT_CLKCMU_DPUF1_NOC		36
+#define MOUT_CLKCMU_DPUF2_NOC		37
+#define MOUT_CLKCMU_DSP_NOC		38
+#define MOUT_CLKCMU_G3D_SWITCH		39
+#define MOUT_CLKCMU_G3D_NOCP		40
+#define MOUT_CLKCMU_GNPU_NOC		41
+#define MOUT_CLKCMU_HSI0_NOC		42
+#define MOUT_CLKCMU_HSI1_NOC		43
+#define MOUT_CLKCMU_HSI1_USBDRD		44
+#define MOUT_CLKCMU_HSI1_MMC_CARD	45
+#define MOUT_CLKCMU_HSI2_NOC		46
+#define MOUT_CLKCMU_HSI2_NOC_UFS	47
+#define MOUT_CLKCMU_HSI2_UFS_EMBD	48
+#define MOUT_CLKCMU_HSI2_ETHERNET	49
+#define MOUT_CLKCMU_ISP_NOC		50
+#define MOUT_CLKCMU_M2M_NOC		51
+#define MOUT_CLKCMU_M2M_JPEG		52
+#define MOUT_CLKCMU_MFC_MFC		53
+#define MOUT_CLKCMU_MFC_WFD		54
+#define MOUT_CLKCMU_MFD_NOC		55
+#define MOUT_CLKCMU_MIF_SWITCH		56
+#define MOUT_CLKCMU_MIF_NOCP		57
+#define MOUT_CLKCMU_MISC_NOC		58
+#define MOUT_CLKCMU_NOCL0_NOC		59
+#define MOUT_CLKCMU_NOCL1_NOC		60
+#define MOUT_CLKCMU_NOCL2_NOC		61
+#define MOUT_CLKCMU_PERIC0_NOC		62
+#define MOUT_CLKCMU_PERIC0_IP		63
+#define MOUT_CLKCMU_PERIC1_NOC		64
+#define MOUT_CLKCMU_PERIC1_IP		65
+#define MOUT_CLKCMU_SDMA_NOC		66
+#define MOUT_CLKCMU_SNW_NOC		67
+#define MOUT_CLKCMU_SSP_NOC		68
+#define MOUT_CLKCMU_TAA_NOC		69
+
+/* DIV in CMU_TOP */
+#define DOUT_SHARED0_DIV1		70
+#define DOUT_SHARED0_DIV2		71
+#define DOUT_SHARED0_DIV3		72
+#define DOUT_SHARED0_DIV4		73
+#define DOUT_SHARED1_DIV1		74
+#define DOUT_SHARED1_DIV2		75
+#define DOUT_SHARED1_DIV3		76
+#define DOUT_SHARED1_DIV4		77
+#define DOUT_SHARED2_DIV1		78
+#define DOUT_SHARED2_DIV2		79
+#define DOUT_SHARED2_DIV3		80
+#define DOUT_SHARED2_DIV4		81
+#define DOUT_SHARED3_DIV1		82
+#define DOUT_SHARED3_DIV2		83
+#define DOUT_SHARED3_DIV3		84
+#define DOUT_SHARED3_DIV4		85
+#define DOUT_SHARED4_DIV1		86
+#define DOUT_SHARED4_DIV2		87
+#define DOUT_SHARED4_DIV3		88
+#define DOUT_SHARED4_DIV4		89
+#define DOUT_SHARED5_DIV1		90
+#define DOUT_SHARED5_DIV2		91
+#define DOUT_SHARED5_DIV3		92
+#define DOUT_SHARED5_DIV4		93
+#define DOUT_CLKCMU_CMU_BOOST		94
+#define DOUT_CLKCMU_ACC_NOC		95
+#define DOUT_CLKCMU_ACC_ORB		96
+#define DOUT_CLKCMU_APM_NOC		97
+#define DOUT_CLKCMU_AUD_CPU		98
+#define DOUT_CLKCMU_AUD_NOC		99
+#define DOUT_CLKCMU_CPUCL0_SWITCH	100
+#define DOUT_CLKCMU_CPUCL0_CLUSTER	101
+#define DOUT_CLKCMU_CPUCL0_DBG		102
+#define DOUT_CLKCMU_CPUCL1_SWITCH	103
+#define DOUT_CLKCMU_CPUCL1_CLUSTER	104
+#define DOUT_CLKCMU_CPUCL2_SWITCH	105
+#define DOUT_CLKCMU_CPUCL2_CLUSTER	106
+#define DOUT_CLKCMU_DNC_NOC		107
+#define DOUT_CLKCMU_DPTX_NOC		108
+#define DOUT_CLKCMU_DPTX_DPGTC		109
+#define DOUT_CLKCMU_DPTX_DPOSC		110
+#define DOUT_CLKCMU_DPUB_NOC		111
+#define DOUT_CLKCMU_DPUB_DSIM		112
+#define DOUT_CLKCMU_DPUF0_NOC		113
+#define DOUT_CLKCMU_DPUF1_NOC		114
+#define DOUT_CLKCMU_DPUF2_NOC		115
+#define DOUT_CLKCMU_DSP_NOC		116
+#define DOUT_CLKCMU_G3D_SWITCH		117
+#define DOUT_CLKCMU_G3D_NOCP		118
+#define DOUT_CLKCMU_GNPU_NOC		119
+#define DOUT_CLKCMU_HSI0_NOC		120
+#define DOUT_CLKCMU_HSI1_NOC		121
+#define DOUT_CLKCMU_HSI1_USBDRD		122
+#define DOUT_CLKCMU_HSI1_MMC_CARD	123
+#define DOUT_CLKCMU_HSI2_NOC		124
+#define DOUT_CLKCMU_HSI2_NOC_UFS	125
+#define DOUT_CLKCMU_HSI2_UFS_EMBD	126
+#define DOUT_CLKCMU_HSI2_ETHERNET	127
+#define DOUT_CLKCMU_ISP_NOC		128
+#define DOUT_CLKCMU_M2M_NOC		129
+#define DOUT_CLKCMU_M2M_JPEG		130
+#define DOUT_CLKCMU_MFC_MFC		131
+#define DOUT_CLKCMU_MFC_WFD		132
+#define DOUT_CLKCMU_MFD_NOC		133
+#define DOUT_CLKCMU_MIF_NOCP		134
+#define DOUT_CLKCMU_MISC_NOC		135
+#define DOUT_CLKCMU_NOCL0_NOC		136
+#define DOUT_CLKCMU_NOCL1_NOC		137
+#define DOUT_CLKCMU_NOCL2_NOC		138
+#define DOUT_CLKCMU_PERIC0_NOC		139
+#define DOUT_CLKCMU_PERIC0_IP		140
+#define DOUT_CLKCMU_PERIC1_NOC		141
+#define DOUT_CLKCMU_PERIC1_IP		142
+#define DOUT_CLKCMU_SDMA_NOC		143
+#define DOUT_CLKCMU_SNW_NOC		144
+#define DOUT_CLKCMU_SSP_NOC		145
+#define DOUT_CLKCMU_TAA_NOC		146
+
+/* CMU_PERIC0 */
+#define CLK_MOUT_PERIC0_IP_USER		1
+#define CLK_MOUT_PERIC0_NOC_USER	2
+#define CLK_MOUT_PERIC0_USI00_USI	3
+#define CLK_MOUT_PERIC0_USI01_USI	4
+#define CLK_MOUT_PERIC0_USI02_USI	5
+#define CLK_MOUT_PERIC0_USI03_USI	6
+#define CLK_MOUT_PERIC0_USI04_USI	7
+#define CLK_MOUT_PERIC0_USI05_USI	8
+#define CLK_MOUT_PERIC0_USI06_USI	9
+#define CLK_MOUT_PERIC0_USI07_USI	10
+#define CLK_MOUT_PERIC0_USI08_USI	11
+#define CLK_MOUT_PERIC0_USI_I2C		12
+#define CLK_MOUT_PERIC0_I3C		13
+
+#define CLK_DOUT_PERIC0_USI00_USI	14
+#define CLK_DOUT_PERIC0_USI01_USI	15
+#define CLK_DOUT_PERIC0_USI02_USI	16
+#define CLK_DOUT_PERIC0_USI03_USI	17
+#define CLK_DOUT_PERIC0_USI04_USI	18
+#define CLK_DOUT_PERIC0_USI05_USI	19
+#define CLK_DOUT_PERIC0_USI06_USI	20
+#define CLK_DOUT_PERIC0_USI07_USI	21
+#define CLK_DOUT_PERIC0_USI08_USI	22
+#define CLK_DOUT_PERIC0_USI_I2C		23
+#define CLK_DOUT_PERIC0_I3C		24
+
+#endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */
-- 
cgit v1.2.3


From b58b133e680b20d219940e0fdb6f6132c2b60f38 Mon Sep 17 00:00:00 2001
From: Pranjal Shrivastava <praan@google.com>
Date: Fri, 16 Aug 2024 10:49:06 +0000
Subject: iommu: Handle iommu faults for a bad iopf setup

The iommu_report_device_fault function was updated to return void while
assuming that drivers only need to call iommu_report_device_fault() for
reporting an iopf. This implementation causes following problems:

1. The drivers rely on the core code to call it's page_reponse,
   however, when a fault is received and no fault capable domain is
   attached / iopf_param is NULL, the ops->page_response is NOT called
   causing the device to stall in case the fault type was PAGE_REQ.

2. The arm_smmu_v3 driver relies on the returned value to log errors
   returning void from iommu_report_device_fault causes these events to
   be missed while logging.

Modify the iommu_report_device_fault function to return -EINVAL for
cases where no fault capable domain is attached or iopf_param was NULL
and calls back to the driver (ops->page_response) in case the fault type
was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers
to log the fault/event as needed.

Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/
Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Pranjal Shrivastava <praan@google.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20240816104906.1010626-1-praan@google.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 04cbdae0052e..bd722f473635 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1563,7 +1563,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
 void iopf_free_group(struct iopf_group *group);
-void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
 void iopf_group_response(struct iopf_group *group,
 			 enum iommu_page_response_code status);
 #else
@@ -1601,9 +1601,10 @@ static inline void iopf_free_group(struct iopf_group *group)
 {
 }
 
-static inline void
+static inline int
 iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 {
+	return -ENODEV;
 }
 
 static inline void iopf_group_response(struct iopf_group *group,
-- 
cgit v1.2.3


From 84429b675bcfd2a518ae167ee4661cdf7539aa7d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 22 Aug 2024 15:50:09 +0200
Subject: fs: Allow fine-grained control of folio sizes

We need filesystems to be able to communicate acceptable folio sizes
to the pagecache for a variety of uses (e.g. large block sizes).
Support a range of folio sizes between order-0 and order-31.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240822135018.1931258-2-kernel@pankajraghav.com
Tested-by: David Howells <dhowells@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pagemap.h | 90 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 76 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d9c7edb6422b..c60025bb584c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -204,14 +204,21 @@ enum mapping_flags {
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_LARGE_FOLIO_SUPPORT = 6,
-	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
-	AS_STABLE_WRITES,	/* must wait for writeback before modifying
+	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
+	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
 				   folio contents */
-	AS_INACCESSIBLE,	/* Do not attempt direct R/W access to the mapping,
-				   including to move the mapping */
+	AS_INACCESSIBLE = 8,	/* Do not attempt direct R/W access to the mapping */
+	/* Bits 16-25 are used for FOLIO_ORDER */
+	AS_FOLIO_ORDER_BITS = 5,
+	AS_FOLIO_ORDER_MIN = 16,
+	AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
 };
 
+#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1)
+#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN)
+#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX)
+#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
+
 /**
  * mapping_set_error - record a writeback error in the address_space
  * @mapping: the mapping in which an error should be set
@@ -367,9 +374,51 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 #define MAX_XAS_ORDER		(XA_CHUNK_SHIFT * 2 - 1)
 #define MAX_PAGECACHE_ORDER	min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
 
+/*
+ * mapping_set_folio_order_range() - Set the orders supported by a file.
+ * @mapping: The address space of the file.
+ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
+ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate which base size (min) and maximum size (max) of folio the VFS
+ * can use to cache the contents of the file.  This should only be used
+ * if the filesystem needs special handling of folio sizes (ie there is
+ * something the core cannot know).
+ * Do not tune it based on, eg, i_size.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_folio_order_range(struct address_space *mapping,
+						 unsigned int min,
+						 unsigned int max)
+{
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return;
+
+	if (min > MAX_PAGECACHE_ORDER)
+		min = MAX_PAGECACHE_ORDER;
+
+	if (max > MAX_PAGECACHE_ORDER)
+		max = MAX_PAGECACHE_ORDER;
+
+	if (max < min)
+		max = min;
+
+	mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
+		(min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
+}
+
+static inline void mapping_set_folio_min_order(struct address_space *mapping,
+					       unsigned int min)
+{
+	mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
+}
+
 /**
  * mapping_set_large_folios() - Indicate the file supports large folios.
- * @mapping: The file.
+ * @mapping: The address space of the file.
  *
  * The filesystem should call this function in its inode constructor to
  * indicate that the VFS can use large folios to cache the contents of
@@ -380,7 +429,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  */
 static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
+}
+
+static inline unsigned int
+mapping_max_folio_order(const struct address_space *mapping)
+{
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return 0;
+	return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
+}
+
+static inline unsigned int
+mapping_min_folio_order(const struct address_space *mapping)
+{
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return 0;
+	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
 }
 
 /*
@@ -389,20 +454,17 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
  */
 static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
-	/* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */
+	/* AS_FOLIO_ORDER is only reasonable for pagecache folios */
 	VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
 			"Anonymous mapping always supports large folio");
 
-	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
-		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	return mapping_max_folio_order(mapping) > 0;
 }
 
 /* Return the maximum folio size for this pagecache mapping, in bytes. */
-static inline size_t mapping_max_folio_size(struct address_space *mapping)
+static inline size_t mapping_max_folio_size(const struct address_space *mapping)
 {
-	if (mapping_large_folio_support(mapping))
-		return PAGE_SIZE << MAX_PAGECACHE_ORDER;
-	return PAGE_SIZE;
+	return PAGE_SIZE << mapping_max_folio_order(mapping);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
-- 
cgit v1.2.3


From ab95d23bab220ef845c0d422f49452a475330eaf Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Thu, 22 Aug 2024 15:50:10 +0200
Subject: filemap: allocate mapping_min_order folios in the page cache

filemap_create_folio() and do_read_cache_folio() were always allocating
folio of order 0. __filemap_get_folio was trying to allocate higher
order folios when fgp_flags had higher order hint set but it will default
to order 0 folio if higher order memory allocation fails.

Supporting mapping_min_order implies that we guarantee each folio in the
page cache has at least an order of mapping_min_order. When adding new
folios to the page cache we must also ensure the index used is aligned to
the mapping_min_order as the page cache requires the index to be aligned
to the order of the folio.

Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240822135018.1931258-3-kernel@pankajraghav.com
Tested-by: David Howells <dhowells@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pagemap.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c60025bb584c..4cc170949e9c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -448,6 +448,26 @@ mapping_min_folio_order(const struct address_space *mapping)
 	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
 }
 
+static inline unsigned long
+mapping_min_folio_nrpages(struct address_space *mapping)
+{
+	return 1UL << mapping_min_folio_order(mapping);
+}
+
+/**
+ * mapping_align_index() - Align index for this mapping.
+ * @mapping: The address_space.
+ *
+ * The index of a folio must be naturally aligned.  If you are adding a
+ * new folio to the page cache and need to know what index to give it,
+ * call this function.
+ */
+static inline pgoff_t mapping_align_index(struct address_space *mapping,
+					  pgoff_t index)
+{
+	return round_down(index, mapping_min_folio_nrpages(mapping));
+}
+
 /*
  * Large folio support currently depends on THP.  These dependencies are
  * being worked on but are not yet fixed.
-- 
cgit v1.2.3


From 3849687869092094003ba009dc00e2e0237a3b8a Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:09:55 +0200
Subject: net: phy: Introduce ethernet link topology representation

Link topologies containing multiple network PHYs attached to the same
net_device can be found when using a PHY as a media converter for use
with an SFP connector, on which an SFP transceiver containing a PHY can
be used.

With the current model, the transceiver's PHY can't be used for
operations such as cable testing, timestamping, macsec offload, etc.

The reason being that most of the logic for these configuration, coming
from either ethtool netlink or ioctls tend to use netdev->phydev, which
in multi-phy systems will reference the PHY closest to the MAC.

Introduce a numbering scheme allowing to enumerate PHY devices that
belong to any netdev, which can in turn allow userspace to take more
precise decisions with regard to each PHY's configuration.

The numbering is maintained per-netdev, in a phy_device_list.
The numbering works similarly to a netdevice's ifindex, with
identifiers that are only recycled once INT_MAX has been reached.

This prevents races that could occur between PHY listing and SFP
transceiver removal/insertion.

The identifiers are assigned at phy_attach time, as the numbering
depends on the netdevice the phy is attached to. The PHY index can be
re-used for PHYs that are persistent.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h         |  4 +-
 include/linux/phy.h               |  4 ++
 include/linux/phy_link_topology.h | 82 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ethtool.h      | 16 ++++++++
 4 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/phy_link_topology.h

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 614ec5d3d75b..289a6133769c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -40,7 +40,6 @@
 #include <net/dcbnl.h>
 #endif
 #include <net/netprio_cgroup.h>
-
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
 #include <linux/netdevice_xmit.h>
@@ -81,6 +80,7 @@ struct xdp_frame;
 struct xdp_metadata_ops;
 struct xdp_md;
 struct ethtool_netdev_state;
+struct phy_link_topology;
 
 typedef u32 xdp_features_t;
 
@@ -1951,6 +1951,7 @@ enum netdev_reg_state {
  *	@fcoe_ddp_xid:	Max exchange id for FCoE LRO by ddp
  *
  *	@priomap:	XXX: need comments on this one
+ *	@link_topo:	Physical link topology tracking attached PHYs
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
  *	@sfp_bus:	attached &struct sfp_bus structure.
@@ -2342,6 +2343,7 @@ struct net_device {
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
+	struct phy_link_topology	*link_topo;
 	struct phy_device	*phydev;
 	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6b7d40d49129..3b0f4bf80650 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -554,6 +554,9 @@ struct macsec_ops;
  * @drv: Pointer to the driver for this PHY instance
  * @devlink: Create a link between phy dev and mac dev, if the external phy
  *           used by current mac interface is managed by another mac interface.
+ * @phyindex: Unique id across the phy's parent tree of phys to address the PHY
+ *	      from userspace, similar to ifindex. A zero index means the PHY
+ *	      wasn't assigned an id yet.
  * @phy_id: UID for this device found during discovery
  * @c45_ids: 802.3-c45 Device Identifiers if is_c45.
  * @is_c45:  Set to true if this PHY uses clause 45 addressing.
@@ -656,6 +659,7 @@ struct phy_device {
 
 	struct device_link *devlink;
 
+	u32 phyindex;
 	u32 phy_id;
 
 	struct phy_c45_device_ids c45_ids;
diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
new file mode 100644
index 000000000000..68a59e25821c
--- /dev/null
+++ b/include/linux/phy_link_topology.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PHY device list allow maintaining a list of PHY devices that are
+ * part of a netdevice's link topology. PHYs can for example be chained,
+ * as is the case when using a PHY that exposes an SFP module, on which an
+ * SFP transceiver that embeds a PHY is connected.
+ *
+ * This list can then be used by userspace to leverage individual PHY
+ * capabilities.
+ */
+#ifndef __PHY_LINK_TOPOLOGY_H
+#define __PHY_LINK_TOPOLOGY_H
+
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+struct xarray;
+struct phy_device;
+struct sfp_bus;
+
+struct phy_link_topology {
+	struct xarray phys;
+	u32 next_phy_index;
+};
+
+struct phy_device_node {
+	enum phy_upstream upstream_type;
+
+	union {
+		struct net_device	*netdev;
+		struct phy_device	*phydev;
+	} upstream;
+
+	struct sfp_bus *parent_sfp_bus;
+
+	struct phy_device *phy;
+};
+
+#if IS_ENABLED(CONFIG_PHYLIB)
+int phy_link_topo_add_phy(struct net_device *dev,
+			  struct phy_device *phy,
+			  enum phy_upstream upt, void *upstream);
+
+void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy);
+
+static inline struct phy_device *
+phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
+{
+	struct phy_link_topology *topo = dev->link_topo;
+	struct phy_device_node *pdn;
+
+	if (!topo)
+		return NULL;
+
+	pdn = xa_load(&topo->phys, phyindex);
+	if (pdn)
+		return pdn->phy;
+
+	return NULL;
+}
+
+#else
+static inline int phy_link_topo_add_phy(struct net_device *dev,
+					struct phy_device *phy,
+					enum phy_upstream upt, void *upstream)
+{
+	return 0;
+}
+
+static inline void phy_link_topo_del_phy(struct net_device *dev,
+					 struct phy_device *phy)
+{
+}
+
+static inline struct phy_device *
+phy_link_topo_get_phy(struct net_device *dev, u32 phyindex)
+{
+	return NULL;
+}
+#endif
+
+#endif /* __PHY_LINK_TOPOLOGY_H */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 4a0a6e703483..c405ed63acfa 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2533,4 +2533,20 @@ struct ethtool_link_settings {
 	 * __u32 map_lp_advertising[link_mode_masks_nwords];
 	 */
 };
+
+/**
+ * enum phy_upstream - Represents the upstream component a given PHY device
+ * is connected to, as in what is on the other end of the MII bus. Most PHYs
+ * will be attached to an Ethernet MAC controller, but in some cases, there's
+ * an intermediate PHY used as a media-converter, which will driver another
+ * MII interface as its output.
+ * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port,
+ *		      or ethernet controller)
+ * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter)
+ */
+enum phy_upstream {
+	PHY_UPSTREAM_MAC,
+	PHY_UPSTREAM_PHY,
+};
+
 #endif /* _UAPI_LINUX_ETHTOOL_H */
-- 
cgit v1.2.3


From 4d76f115ab9121f4458330da962ae9ef5430e60b Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:09:56 +0200
Subject: net: sfp: pass the phy_device when disconnecting an sfp module's PHY

Pass the phy_device as a parameter to the sfp upstream .disconnect_phy
operation. This is preparatory work to help track phy devices across
a net_device's link.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index b14be59550e3..54abb4d22b2e 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -550,7 +550,7 @@ struct sfp_upstream_ops {
 	void (*link_down)(void *priv);
 	void (*link_up)(void *priv);
 	int (*connect_phy)(void *priv, struct phy_device *);
-	void (*disconnect_phy)(void *priv);
+	void (*disconnect_phy)(void *priv, struct phy_device *);
 };
 
 #if IS_ENABLED(CONFIG_SFP)
-- 
cgit v1.2.3


From b2db6f4ace72e71fa09b8d1354f8ac9854140d74 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:09:57 +0200
Subject: net: phy: add helpers to handle sfp phy connect/disconnect

There are a few PHY drivers that can handle SFP modules through their
sfp_upstream_ops. Introduce Phylib helpers to keep track of connected
SFP PHYs in a netdevice's namespace, by adding the SFP PHY to the
upstream PHY's netdev's namespace.

By doing so, these SFP PHYs can be enumerated and exposed to users,
which will be able to use their capabilities.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3b0f4bf80650..a98bc91a0cde 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1781,6 +1781,8 @@ int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
 int __phy_resume(struct phy_device *phydev);
 int phy_loopback(struct phy_device *phydev, bool enable);
+int phy_sfp_connect_phy(void *upstream, struct phy_device *phy);
+void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy);
 void phy_sfp_attach(void *upstream, struct sfp_bus *bus);
 void phy_sfp_detach(void *upstream, struct sfp_bus *bus);
 int phy_sfp_probe(struct phy_device *phydev,
-- 
cgit v1.2.3


From 0a2f7de0f3b96c4a30e56d2c79f8d812f89599a1 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:09:58 +0200
Subject: net: sfp: Add helper to return the SFP bus name

Knowing the bus name is helpful when we want to expose the link topology
to userspace, add a helper to return the SFP bus name.

This call will always be made while holding the RTNL which ensures
that the SFP driver won't unbind from the device. The returned pointer
to the bus name will only be used while RTNL is held.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Suggested-by: "Russell King (Oracle)" <linux@armlinux.org.uk>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sfp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 54abb4d22b2e..60c65cea74f6 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -576,6 +576,7 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode);
 int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 			 const struct sfp_upstream_ops *ops);
 void sfp_bus_del_upstream(struct sfp_bus *bus);
+const char *sfp_get_name(struct sfp_bus *bus);
 #else
 static inline int sfp_parse_port(struct sfp_bus *bus,
 				 const struct sfp_eeprom_id *id,
@@ -654,6 +655,11 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 static inline void sfp_bus_del_upstream(struct sfp_bus *bus)
 {
 }
+
+static inline const char *sfp_get_name(struct sfp_bus *bus)
+{
+	return NULL;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From c15e065b46dc4e19837275b826c1960d55564abd Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:09:59 +0200
Subject: net: ethtool: Allow passing a phy index for some commands

Some netlink commands are target towards ethernet PHYs, to control some
of their features. As there's several such commands, add the ability to
pass a PHY index in the ethnl request, which will populate the generic
ethnl_req_info with the passed phy_index.

Add a helper that netlink command handlers need to use to grab the
targeted PHY from the req_info. This helper needs to hold rtnl_lock()
while interacting with the PHY, as it may be removed at any point.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9074fa309bd6..49d1f9220fde 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -134,6 +134,7 @@ enum {
 	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
 	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
 	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
+	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_HEADER_CNT,
-- 
cgit v1.2.3


From 17194be4c8e1e82d8b484e58cdcb495c0714d1fd Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 21 Aug 2024 17:10:01 +0200
Subject: net: ethtool: Introduce a command to list PHYs on an interface

As we have the ability to track the PHYs connected to a net_device
through the link_topology, we can expose this list to userspace. This
allows userspace to use these identifiers for phy-specific commands and
take the decision of which PHY to target by knowing the link topology.

Add PHY_GET and PHY_DUMP, which can be a filtered DUMP operation to list
devices on only one interface.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool_netlink.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 49d1f9220fde..45d8bcdea056 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -58,6 +58,7 @@ enum {
 	ETHTOOL_MSG_MM_GET,
 	ETHTOOL_MSG_MM_SET,
 	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+	ETHTOOL_MSG_PHY_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -111,6 +112,8 @@ enum {
 	ETHTOOL_MSG_MM_GET_REPLY,
 	ETHTOOL_MSG_MM_NTF,
 	ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
+	ETHTOOL_MSG_PHY_GET_REPLY,
+	ETHTOOL_MSG_PHY_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -1055,6 +1058,22 @@ enum {
 	ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_PHY_UNSPEC,
+	ETHTOOL_A_PHY_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PHY_INDEX,			/* u32 */
+	ETHTOOL_A_PHY_DRVNAME,			/* string */
+	ETHTOOL_A_PHY_NAME,			/* string */
+	ETHTOOL_A_PHY_UPSTREAM_TYPE,		/* u32 */
+	ETHTOOL_A_PHY_UPSTREAM_INDEX,		/* u32 */
+	ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,	/* string */
+	ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,	/* string */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PHY_CNT,
+	ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3


From c579286a514d88a3f0d3bdabfd4d88737b33cb17 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 19 Aug 2024 18:31:33 +0200
Subject: thermal: core: Drop unused bind/unbind functions and callbacks

There are no more callers of thermal_zone_bind_cooling_device() and
thermal_zone_unbind_cooling_device(), so drop them along with all of
the corresponding headers, code and documentation.

Moreover, because the .bind() and .unbind() thermal zone callbacks would
only be used when the above functions, respectively, were called, drop
them as well along with all of the code related to them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Huisong Li <lihuisong@huawei.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/4251116.1IzOArtZ34@rjwysocki.net
---
 include/linux/thermal.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 6599a26847f7..25ea8fe2313e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -92,10 +92,6 @@ struct cooling_spec {
 };
 
 struct thermal_zone_device_ops {
-	int (*bind) (struct thermal_zone_device *,
-		     struct thermal_cooling_device *);
-	int (*unbind) (struct thermal_zone_device *,
-		       struct thermal_cooling_device *);
 	bool (*should_bind) (struct thermal_zone_device *,
 			     const struct thermal_trip *,
 			     struct thermal_cooling_device *,
@@ -247,12 +243,6 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
 int thermal_zone_device_id(struct thermal_zone_device *tzd);
 struct device *thermal_zone_device(struct thermal_zone_device *tzd);
 
-int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
-				     struct thermal_cooling_device *,
-				     unsigned long, unsigned long,
-				     unsigned int);
-int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
-				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *,
 				enum thermal_notify_event);
 
-- 
cgit v1.2.3


From 4b570ac2eb54f66ff64f2864be6303b8d67cc7f9 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Thu, 22 Aug 2024 09:33:55 +0200
Subject: drm/rect: Add drm_rect_overlap()

Check if two rectangles overlap.
It's a bit similar to drm_rect_intersect() but this won't modify
the rectangle.
Simplifies a bit drm_panic.

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240822073852.562286-3-jfalempe@redhat.com
---
 include/drm/drm_rect.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h
index 73fcb899a01d..46f09cf68458 100644
--- a/include/drm/drm_rect.h
+++ b/include/drm/drm_rect.h
@@ -238,6 +238,21 @@ static inline void drm_rect_fp_to_int(struct drm_rect *dst,
 		      drm_rect_height(src) >> 16);
 }
 
+/**
+ * drm_rect_overlap - Check if two rectangles overlap
+ * @a: first rectangle
+ * @b: second rectangle
+ *
+ * RETURNS:
+ * %true if the rectangles overlap, %false otherwise.
+ */
+static inline bool drm_rect_overlap(const struct drm_rect *a,
+				    const struct drm_rect *b)
+{
+	return (a->x2 > b->x1 && b->x2 > a->x1 &&
+		a->y2 > b->y1 && b->y2 > a->y1);
+}
+
 bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip);
 bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
 			  const struct drm_rect *clip);
-- 
cgit v1.2.3


From 4ec2caab775606c4bf9d0f026f8942a0b33e2255 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 23 Aug 2024 00:41:25 +0300
Subject: media: v4l2-mc: Mark v4l2_pipeline_link_notify() as deprecated

Commit b97213a41140 ("media: v4l2-mc: Make v4l2_pipeline_pm_{get,put}
deprecated") marked the v4l2_pipeline_pm_get() and
v4l2_pipeline_pm_put() functions as deprecated, but forgot to address
the related v4l2_pipeline_link_notify() function similarly. Fix it.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20240822214125.3161-1-laurent.pinchart+renesas@ideasonboard.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/media/v4l2-mc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mc.h b/include/media/v4l2-mc.h
index ed0a44b6eada..1837c9fd78cf 100644
--- a/include/media/v4l2-mc.h
+++ b/include/media/v4l2-mc.h
@@ -178,6 +178,9 @@ void v4l2_pipeline_pm_put(struct media_entity *entity);
  * @flags: New link flags that will be applied
  * @notification: The link's state change notification type (MEDIA_DEV_NOTIFY_*)
  *
+ * THIS FUNCTION IS DEPRECATED. DO NOT USE IN NEW DRIVERS. USE RUNTIME PM
+ * ON SUB-DEVICE DRIVERS INSTEAD.
+ *
  * React to link management on powered pipelines by updating the use count of
  * all entities in the source and sink sides of the link. Entities are powered
  * on or off accordingly. The use of this function should be paired
-- 
cgit v1.2.3


From ccce71013406a0a3c81850dab940f07b112349d3 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 14 Aug 2024 14:21:18 +0200
Subject: mtd: rawnand: davinci: make platform_data private

There are no longer any users of the platform data for davinci rawnand
in board files. We can remove the public pdata headers and move the
structures that are still used into the driver compilation unit while
removing the rest.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240814122120.13975-1-brgl@bgdev.pl
---
 include/linux/platform_data/mtd-davinci-aemif.h | 36 ----------
 include/linux/platform_data/mtd-davinci.h       | 88 -------------------------
 2 files changed, 124 deletions(-)
 delete mode 100644 include/linux/platform_data/mtd-davinci-aemif.h
 delete mode 100644 include/linux/platform_data/mtd-davinci.h

(limited to 'include')

diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h
deleted file mode 100644
index a49826214a39..000000000000
--- a/include/linux/platform_data/mtd-davinci-aemif.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * TI DaVinci AEMIF support
- *
- * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-#ifndef _MACH_DAVINCI_AEMIF_H
-#define _MACH_DAVINCI_AEMIF_H
-
-#include <linux/platform_device.h>
-
-#define NRCSR_OFFSET		0x00
-#define AWCCR_OFFSET		0x04
-#define A1CR_OFFSET		0x10
-
-#define ACR_ASIZE_MASK		0x3
-#define ACR_EW_MASK		BIT(30)
-#define ACR_SS_MASK		BIT(31)
-
-/* All timings in nanoseconds */
-struct davinci_aemif_timing {
-	u8	wsetup;
-	u8	wstrobe;
-	u8	whold;
-
-	u8	rsetup;
-	u8	rstrobe;
-	u8	rhold;
-
-	u8	ta;
-};
-
-#endif
diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h
deleted file mode 100644
index dd474dd44848..000000000000
--- a/include/linux/platform_data/mtd-davinci.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * mach-davinci/nand.h
- *
- * Copyright © 2006 Texas Instruments.
- *
- * Ported to 2.6.23 Copyright © 2008 by
- *   Sander Huijsen <Shuijsen@optelecom-nkf.com>
- *   Troy Kisky <troy.kisky@boundarydevices.com>
- *   Dirk Behme <Dirk.Behme@gmail.com>
- *
- * --------------------------------------------------------------------------
- */
-
-#ifndef __ARCH_ARM_DAVINCI_NAND_H
-#define __ARCH_ARM_DAVINCI_NAND_H
-
-#include <linux/mtd/rawnand.h>
-
-#define NANDFCR_OFFSET		0x60
-#define NANDFSR_OFFSET		0x64
-#define NANDF1ECC_OFFSET	0x70
-
-/* 4-bit ECC syndrome registers */
-#define NAND_4BIT_ECC_LOAD_OFFSET	0xbc
-#define NAND_4BIT_ECC1_OFFSET		0xc0
-#define NAND_4BIT_ECC2_OFFSET		0xc4
-#define NAND_4BIT_ECC3_OFFSET		0xc8
-#define NAND_4BIT_ECC4_OFFSET		0xcc
-#define NAND_ERR_ADD1_OFFSET		0xd0
-#define NAND_ERR_ADD2_OFFSET		0xd4
-#define NAND_ERR_ERRVAL1_OFFSET		0xd8
-#define NAND_ERR_ERRVAL2_OFFSET		0xdc
-
-/* NOTE:  boards don't need to use these address bits
- * for ALE/CLE unless they support booting from NAND.
- * They're used unless platform data overrides them.
- */
-#define	MASK_ALE		0x08
-#define	MASK_CLE		0x10
-
-struct davinci_nand_pdata {		/* platform_data */
-	uint32_t		mask_ale;
-	uint32_t		mask_cle;
-
-	/*
-	 * 0-indexed chip-select number of the asynchronous
-	 * interface to which the NAND device has been connected.
-	 *
-	 * So, if you have NAND connected to CS3 of DA850, you
-	 * will pass '1' here. Since the asynchronous interface
-	 * on DA850 starts from CS2.
-	 */
-	uint32_t		core_chipsel;
-
-	/* for packages using two chipselects */
-	uint32_t		mask_chipsel;
-
-	/* board's default static partition info */
-	struct mtd_partition	*parts;
-	unsigned		nr_parts;
-
-	/* none  == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!)
-	 * soft  == NAND_ECC_ENGINE_TYPE_SOFT
-	 * else  == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits
-	 *
-	 * All DaVinci-family chips support 1-bit hardware ECC.
-	 * Newer ones also support 4-bit ECC, but are awkward
-	 * using it with large page chips.
-	 */
-	enum nand_ecc_engine_type engine_type;
-	enum nand_ecc_placement ecc_placement;
-	u8			ecc_bits;
-
-	/* e.g. NAND_BUSWIDTH_16 */
-	unsigned		options;
-	/* e.g. NAND_BBT_USE_FLASH */
-	unsigned		bbt_options;
-
-	/* Main and mirror bbt descriptor overrides */
-	struct nand_bbt_descr	*bbt_td;
-	struct nand_bbt_descr	*bbt_md;
-
-	/* Access timings */
-	struct davinci_aemif_timing	*timing;
-};
-
-#endif	/* __ARCH_ARM_DAVINCI_NAND_H */
-- 
cgit v1.2.3


From 8a48281cfa7f380707d42b649acda3ea36348697 Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 23 Aug 2024 15:42:01 +0800
Subject: PCI: Make pci_bus_type constant

Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a
const *"), the driver core can properly handle constant struct bus_type,
move the pci_bus_type variable to be a constant structure as well, placing
it into read-only memory which can not be modified at runtime.

Link: https://lore.kernel.org/r/20240823074202.139265-1-kunwu.chan@linux.dev
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..197292b336a5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1098,7 +1098,7 @@ enum pcie_bus_config_types {
 
 extern enum pcie_bus_config_types pcie_bus_config;
 
-extern struct bus_type pci_bus_type;
+extern const struct bus_type pci_bus_type;
 
 /* Do NOT directly access these two variables, unless you are arch-specific PCI
  * code, or PCI core code. */
-- 
cgit v1.2.3


From 9246b487ab3c3b5993aae7552b7a4c541cc14a49 Mon Sep 17 00:00:00 2001
From: WangYuli <wangyuli@uniontech.com>
Date: Fri, 23 Aug 2024 17:57:08 +0800
Subject: PCI: Add function 0 DMA alias quirk for Glenfly Arise chip

Add DMA support for audio function of Glenfly Arise chip, which uses
Requester ID of function 0.

Link: https://lore.kernel.org/r/CA2BBD087345B6D1+20240823095708.3237375-1-wangyuli@uniontech.com
Signed-off-by: SiyuLi <siyuli@glenfly.com>
Signed-off-by: WangYuli <wangyuli@uniontech.com>
[bhelgaas: lower-case hex to match local code, drop unused Device IDs]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e388c8b1cbc2..2c94d4004dd5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2661,6 +2661,8 @@
 #define PCI_DEVICE_ID_DCI_PCCOM8	0x0002
 #define PCI_DEVICE_ID_DCI_PCCOM2	0x0004
 
+#define PCI_VENDOR_ID_GLENFLY		0x6766
+
 #define PCI_VENDOR_ID_INTEL		0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
 #define PCI_DEVICE_ID_INTEL_HDA_CML_LP	0x02c8
-- 
cgit v1.2.3


From e5b9032b1b8e0ecc5b71451703f04b4e13ea42f8 Mon Sep 17 00:00:00 2001
From: Adam Skladowski <a39.skl@gmail.com>
Date: Tue, 9 Jul 2024 12:22:46 +0200
Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8976 NoC

Add bindings for Qualcomm MSM8976 Network-On-Chip interconnect devices.

Signed-off-by: Adam Skladowski <a39.skl@gmail.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240709102728.15349-2-a39.skl@gmail.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,msm8976.h | 97 +++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,msm8976.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,msm8976.h b/include/dt-bindings/interconnect/qcom,msm8976.h
new file mode 100644
index 000000000000..4ea90f22320e
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,msm8976.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Qualcomm MSM8976 interconnect IDs
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H
+
+/* BIMC fabric */
+#define MAS_APPS_PROC		0
+#define MAS_SMMNOC_BIMC	        1
+#define MAS_SNOC_BIMC		2
+#define MAS_TCU_0		3
+#define SLV_EBI		        4
+#define SLV_BIMC_SNOC		5
+
+/* PCNOC fabric */
+#define MAS_USB_HS2		0
+#define MAS_BLSP_1		1
+#define MAS_USB_HS1		2
+#define MAS_BLSP_2		3
+#define MAS_CRYPTO		4
+#define MAS_SDCC_1		5
+#define MAS_SDCC_2		6
+#define MAS_SDCC_3		7
+#define MAS_SNOC_PCNOC		8
+#define MAS_LPASS_AHB		9
+#define MAS_SPDM		10
+#define MAS_DEHR		11
+#define MAS_XM_USB_HS1		12
+#define PCNOC_M_0		13
+#define PCNOC_M_1		14
+#define PCNOC_INT_0		15
+#define PCNOC_INT_1		16
+#define PCNOC_INT_2		17
+#define PCNOC_S_1		18
+#define PCNOC_S_2		19
+#define PCNOC_S_3		20
+#define PCNOC_S_4		21
+#define PCNOC_S_8		22
+#define PCNOC_S_9		23
+#define SLV_TCSR		24
+#define SLV_TLMM		25
+#define SLV_CRYPTO_0_CFG	26
+#define SLV_MESSAGE_RAM	        27
+#define SLV_PDM		        28
+#define SLV_PRNG		29
+#define SLV_PMIC_ARB		30
+#define SLV_SNOC_CFG		31
+#define SLV_DCC_CFG		32
+#define SLV_CAMERA_SS_CFG	33
+#define SLV_DISP_SS_CFG	        34
+#define SLV_VENUS_CFG		35
+#define SLV_SDCC_1		36
+#define SLV_BLSP_1		37
+#define SLV_USB_HS		38
+#define SLV_SDCC_3		39
+#define SLV_SDCC_2		40
+#define SLV_GPU_CFG		41
+#define SLV_USB_HS2		42
+#define SLV_BLSP_2		43
+#define SLV_PCNOC_SNOC		44
+
+/* SNOC fabric */
+#define MAS_QDSS_BAM		0
+#define MAS_BIMC_SNOC		1
+#define MAS_PCNOC_SNOC		2
+#define MAS_QDSS_ETR		3
+#define MAS_LPASS_PROC		4
+#define MAS_IPA		        5
+#define QDSS_INT		6
+#define SNOC_INT_0		7
+#define SNOC_INT_1		8
+#define SNOC_INT_2		9
+#define SLV_KPSS_AHB		10
+#define SLV_SNOC_BIMC		11
+#define SLV_IMEM		12
+#define SLV_SNOC_PCNOC		13
+#define SLV_QDSS_STM		14
+#define SLV_CATS_0		15
+#define SLV_CATS_1		16
+#define SLV_LPASS		17
+
+/* SNOC-MM fabric */
+#define MAS_JPEG		0
+#define MAS_OXILI		1
+#define MAS_MDP0		2
+#define MAS_MDP1		3
+#define MAS_VENUS_0		4
+#define MAS_VENUS_1		5
+#define MAS_VFE_0		6
+#define MAS_VFE_1		7
+#define MAS_CPP		        8
+#define MM_INT_0		9
+#define SLV_SMMNOC_BIMC		10
+
+#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H */
-- 
cgit v1.2.3


From 4937dc0ffc8edc774b21882d557c032f3693f40d Mon Sep 17 00:00:00 2001
From: Adam Skladowski <a39.skl@gmail.com>
Date: Tue, 9 Jul 2024 12:22:48 +0200
Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8937 NoC

Add bindings for Qualcomm MSM8937 Network-On-Chip interconnect devices.

Signed-off-by: Adam Skladowski <a39.skl@gmail.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240709102728.15349-4-a39.skl@gmail.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,msm8937.h | 93 +++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,msm8937.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,msm8937.h b/include/dt-bindings/interconnect/qcom,msm8937.h
new file mode 100644
index 000000000000..98b8a4637aab
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,msm8937.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Qualcomm MSM8937 interconnect IDs
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H
+
+/* BIMC fabric */
+#define MAS_APPS_PROC		0
+#define MAS_OXILI		1
+#define MAS_SNOC_BIMC_0		2
+#define MAS_SNOC_BIMC_2		3
+#define MAS_SNOC_BIMC_1		4
+#define MAS_TCU_0		5
+#define SLV_EBI			6
+#define SLV_BIMC_SNOC		7
+
+/* PCNOC fabric */
+#define MAS_SPDM		0
+#define MAS_BLSP_1		1
+#define MAS_BLSP_2		2
+#define MAS_USB_HS1		3
+#define MAS_XI_USB_HS1		4
+#define MAS_CRYPTO		5
+#define MAS_SDCC_1		6
+#define MAS_SDCC_2		7
+#define MAS_SNOC_PCNOC		8
+#define PCNOC_M_0		9
+#define PCNOC_M_1		10
+#define PCNOC_INT_0		11
+#define PCNOC_INT_1		12
+#define PCNOC_INT_2		13
+#define PCNOC_INT_3		14
+#define PCNOC_S_0		15
+#define PCNOC_S_1		16
+#define PCNOC_S_2		17
+#define PCNOC_S_3		18
+#define PCNOC_S_4		19
+#define PCNOC_S_6		20
+#define PCNOC_S_7		21
+#define PCNOC_S_8		22
+#define SLV_SDCC_2		23
+#define SLV_SPDM		24
+#define SLV_PDM			25
+#define SLV_PRNG		26
+#define SLV_TCSR		27
+#define SLV_SNOC_CFG		28
+#define SLV_MESSAGE_RAM		29
+#define SLV_CAMERA_SS_CFG	30
+#define SLV_DISP_SS_CFG		31
+#define SLV_VENUS_CFG		32
+#define SLV_GPU_CFG		33
+#define SLV_TLMM		34
+#define SLV_BLSP_1		35
+#define SLV_BLSP_2		36
+#define SLV_PMIC_ARB		37
+#define SLV_SDCC_1		38
+#define SLV_CRYPTO_0_CFG	39
+#define SLV_USB_HS		40
+#define SLV_TCU			41
+#define SLV_PCNOC_SNOC		42
+
+/* SNOC fabric */
+#define MAS_QDSS_BAM		0
+#define MAS_BIMC_SNOC		1
+#define MAS_PCNOC_SNOC		2
+#define MAS_QDSS_ETR		3
+#define QDSS_INT		4
+#define SNOC_INT_0		5
+#define SNOC_INT_1		6
+#define SNOC_INT_2		7
+#define SLV_KPSS_AHB		8
+#define SLV_WCSS		9
+#define SLV_SNOC_BIMC_1		10
+#define SLV_IMEM		11
+#define SLV_SNOC_PCNOC		12
+#define SLV_QDSS_STM		13
+#define SLV_CATS_1		14
+#define SLV_LPASS		15
+
+/* SNOC-MM fabric */
+#define MAS_JPEG		0
+#define MAS_MDP			1
+#define MAS_VENUS		2
+#define MAS_VFE0		3
+#define MAS_VFE1		4
+#define MAS_CPP			5
+#define SLV_SNOC_BIMC_0		6
+#define SLV_SNOC_BIMC_2		7
+#define SLV_CATS_0		8
+
+#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H */
-- 
cgit v1.2.3


From d59232afb0344e33e9399f308d9b4a03876e7676 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Tue, 13 Aug 2024 21:24:22 +0000
Subject: bpf: Rename ARG_PTR_TO_KPTR -> ARG_KPTR_XCHG_DEST

ARG_PTR_TO_KPTR is currently only used by the bpf_kptr_xchg helper.
Although it limits reg types for that helper's first arg to
PTR_TO_MAP_VALUE, any arbitrary mapval won't do: further custom
verification logic ensures that the mapval reg being xchgd-into is
pointing to a kptr field. If this is not the case, it's not safe to xchg
into that reg's pointee.

Let's rename the bpf_arg_type to more accurately describe the fairly
specific expectations that this arg type encodes.

This is a nonfunctional change.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-4-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 00dc4dd28cbd..dc63083f76b7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -744,7 +744,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_STACK,	/* pointer to stack */
 	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
 	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
-	ARG_PTR_TO_KPTR,	/* pointer to referenced kptr */
+	ARG_KPTR_XCHG_DEST,	/* pointer to destination that kptrs are bpf_kptr_xchg'd into */
 	ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
 	__BPF_ARG_TYPE_MAX,
 
-- 
cgit v1.2.3


From b0966c724584a5a9fd7fb529de19807c31f27a45 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Tue, 13 Aug 2024 21:24:23 +0000
Subject: bpf: Support bpf_kptr_xchg into local kptr

Currently, users can only stash kptr into map values with bpf_kptr_xchg().
This patch further supports stashing kptr into local kptr by adding local
kptr as a valid destination type.

When stashing into local kptr, btf_record in program BTF is used instead
of btf_record in map to search for the btf_field of the local kptr.

The local kptr specific checks in check_reg_type() only apply when the
source argument of bpf_kptr_xchg() is local kptr. Therefore, we make the
scope of the check explicit as the destination now can also be local kptr.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-5-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 35bcf52dbc65..e2629457d72d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5519,11 +5519,12 @@ union bpf_attr {
  *		**-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
  *		invalid arguments are passed.
  *
- * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * void *bpf_kptr_xchg(void *dst, void *ptr)
  *	Description
- *		Exchange kptr at pointer *map_value* with *ptr*, and return the
- *		old value. *ptr* can be NULL, otherwise it must be a referenced
- *		pointer which will be released when this helper is called.
+ *		Exchange kptr at pointer *dst* with *ptr*, and return the old value.
+ *		*dst* can be map value or local kptr. *ptr* can be NULL, otherwise
+ *		it must be a referenced pointer which will be released when this helper
+ *		is called.
  *	Return
  *		The old value of kptr (which can be NULL). The returned pointer
  *		if not NULL, is a reference which must be released using its
-- 
cgit v1.2.3


From 9e83dd3ebb14fadccb936308b7b101c75da76324 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Fri, 23 Aug 2024 18:39:34 +0800
Subject: irqchip/loongson-eiointc: Rename CPUHP_AP_IRQ_LOONGARCH_STARTING

Rename CPUHP_AP_IRQ_LOONGARCH_STARTING to CPUHP_AP_IRQ_EIOINTC_STARTING
because the upcoming AVECINTC irqchip driver will introduce a new state
and so both are clearly identifiable.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240823103936.25092-3-zhangtianyang@loongson.cn
---
 include/linux/cpuhotplug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 51ba681b915a..e49807f7805c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -145,7 +145,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,
-	CPUHP_AP_IRQ_LOONGARCH_STARTING,
+	CPUHP_AP_IRQ_EIOINTC_STARTING,
 	CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 	CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-- 
cgit v1.2.3


From ae16f05c928a1336d5d9d19fd805d7bf29c3f0c8 Mon Sep 17 00:00:00 2001
From: Tianyang Zhang <zhangtianyang@loongson.cn>
Date: Fri, 23 Aug 2024 18:43:37 +0800
Subject: irqchip/loongarch-avec: Add AVEC irqchip support

Introduce the advanced extended interrupt controllers (AVECINTC). This
feature will allow each core to have 256 independent interrupt vectors
and MSI interrupts can be independently routed to any vector on any CPU.

The whole topology of irqchips in LoongArch machines looks like this if
AVECINTC is supported:

  +-----+     +-----------------------+     +-------+
  | IPI | --> |        CPUINTC        | <-- | Timer |
  +-----+     +-----------------------+     +-------+
               ^          ^          ^
               |          |          |
        +---------+ +----------+ +---------+     +-------+
        | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
        +---------+ +----------+ +---------+     +-------+
             ^            ^
             |            |
        +---------+  +---------+
        | PCH-PIC |  | PCH-MSI |
        +---------+  +---------+
          ^     ^           ^
          |     |           |
  +---------+ +---------+ +---------+
  | Devices | | PCH-LPC | | Devices |
  +---------+ +---------+ +---------+
                   ^
                   |
              +---------+
              | Devices |
              +---------+

Co-developed-by: Jianmin Lv <lvjianmin@loongson.cn>
Signed-off-by: Jianmin Lv <lvjianmin@loongson.cn>
Co-developed-by: Liupu Wang <wangliupu@loongson.cn>
Signed-off-by: Liupu Wang <wangliupu@loongson.cn>
Co-developed-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240823104337.25577-2-zhangtianyang@loongson.cn
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e49807f7805c..55a726d317d4 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -146,6 +146,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,
 	CPUHP_AP_IRQ_EIOINTC_STARTING,
+	CPUHP_AP_IRQ_AVECINTC_STARTING,
 	CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 	CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-- 
cgit v1.2.3


From 17e28a9aeae40d2de3c1ea3b94819ed94bfd6392 Mon Sep 17 00:00:00 2001
From: Costa Shulyupin <costa.shul@redhat.com>
Date: Thu, 22 Aug 2024 15:31:58 +0300
Subject: genirq: Fix typo in struct comment

Remove redundant "e" in "assign(e)ments".

Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240822123205.2186221-1-costa.shul@redhat.com
---
 include/linux/interrupt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 694de61e0b38..457151f9f263 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -276,7 +276,7 @@ struct irq_affinity_notify {
 #define	IRQ_AFFINITY_MAX_SETS  4
 
 /**
- * struct irq_affinity - Description for automatic irq affinity assignements
+ * struct irq_affinity - Description for automatic irq affinity assignments
  * @pre_vectors:	Don't apply affinity to @pre_vectors at beginning of
  *			the MSI(-X) vector space
  * @post_vectors:	Don't apply affinity to @post_vectors at end of
-- 
cgit v1.2.3


From b8ca67b1143e4d1e2adc20790c2c46e1498f54be Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 4 Aug 2024 08:40:10 +0300
Subject: dt-bindings: interconnect: qcom,sm8350: drop DISP nodes

Vendor msm-5.x kernels declared duplicate indices for some of display
nodes to be used by separate display RSC and BCM voters. As it is not
clear how this separate BCM should be modelled upstream and the device
trees do not use these indices, drop them for now.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240804-sm8350-fixes-v1-6-1149dd8399fe@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,sm8350.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,sm8350.h b/include/dt-bindings/interconnect/qcom,sm8350.h
index c7f7ed315aeb..2282f93607bc 100644
--- a/include/dt-bindings/interconnect/qcom,sm8350.h
+++ b/include/dt-bindings/interconnect/qcom,sm8350.h
@@ -119,9 +119,6 @@
 #define SLAVE_SERVICE_GEM_NOC_1		16
 #define SLAVE_SERVICE_GEM_NOC_2		17
 #define SLAVE_SERVICE_GEM_NOC		18
-#define MASTER_MNOC_HF_MEM_NOC_DISP	19
-#define MASTER_MNOC_SF_MEM_NOC_DISP	20
-#define SLAVE_LLCC_DISP			21
 
 #define MASTER_CNOC_LPASS_AG_NOC	0
 #define SLAVE_LPASS_CORE_CFG		1
@@ -133,8 +130,6 @@
 
 #define MASTER_LLCC			0
 #define SLAVE_EBI1			1
-#define MASTER_LLCC_DISP		2
-#define SLAVE_EBI1_DISP			3
 
 #define MASTER_CAMNOC_HF		0
 #define MASTER_CAMNOC_ICP		1
@@ -149,11 +144,6 @@
 #define SLAVE_MNOC_HF_MEM_NOC		10
 #define SLAVE_MNOC_SF_MEM_NOC		11
 #define SLAVE_SERVICE_MNOC		12
-#define MASTER_MDP0_DISP		13
-#define MASTER_MDP1_DISP		14
-#define MASTER_ROTATOR_DISP		15
-#define SLAVE_MNOC_HF_MEM_NOC_DISP	16
-#define SLAVE_MNOC_SF_MEM_NOC_DISP	17
 
 #define MASTER_CDSP_NOC_CFG		0
 #define MASTER_CDSP_PROC		1
-- 
cgit v1.2.3


From 65ab5ac4df012388481d0414fcac1d5ac1721fb3 Mon Sep 17 00:00:00 2001
From: Jordan Rome <linux@jordanrome.com>
Date: Fri, 23 Aug 2024 12:51:00 -0700
Subject: bpf: Add bpf_copy_from_user_str kfunc

This adds a kfunc wrapper around strncpy_from_user,
which can be called from sleepable BPF programs.

This matches the non-sleepable 'bpf_probe_read_user_str'
helper except it includes an additional 'flags'
param, which allows consumers to clear the entire
destination buffer on success or failure.

Signed-off-by: Jordan Rome <linux@jordanrome.com>
Link: https://lore.kernel.org/r/20240823195101.3621028-1-linux@jordanrome.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e2629457d72d..c3a5728db115 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7513,4 +7513,13 @@ struct bpf_iter_num {
 	__u64 __opaque[1];
 } __attribute__((aligned(8)));
 
+/*
+ * Flags to control BPF kfunc behaviour.
+ *     - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
+ *       helper documentation for details.)
+ */
+enum bpf_kfunc_flags {
+	BPF_F_PAD_ZEROS = (1ULL << 0),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From a54ad727f74559f7c3dfcfd2a63d0ce7683a82e8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 22 Aug 2024 15:04:32 +0200
Subject: xfrm: policy: remove remaining use of inexact list

No consumers anymore, remove it.  After this, insertion of policies
no longer require list walk of all inexact policies but only those
that are reachable via the candidate sets.

This gives almost linear insertion speeds provided the inserted
policies are for non-overlapping networks.

Before:
Inserted 1000   policies in 70 ms
Inserted 10000  policies in 1155 ms
Inserted 100000 policies in 216848 ms

After:
Inserted 1000   policies in 56 ms
Inserted 10000  policies in 478 ms
Inserted 100000 policies in 4580 ms

Insertion of 1m entries takes about ~40s after this change
on my test vm.

Cc: Noel Kuntze <noel@familie-kuntze.de>
Cc: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f7244ac4fa08..1fa2da22a49e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -555,7 +555,6 @@ struct xfrm_policy {
 	u16			family;
 	struct xfrm_sec_ctx	*security;
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
-	struct hlist_node	bydst_inexact_list;
 	struct rcu_head		rcu;
 
 	struct xfrm_dev_offload xdo;
-- 
cgit v1.2.3


From 3c44d31cb34ce4eb8311a2e73634d57702948230 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 17 Aug 2024 14:58:35 +0800
Subject: crypto: simd - Do not call crypto_alloc_tfm during registration

Algorithm registration is usually carried out during module init,
where as little work as possible should be carried out.  The SIMD
code violated this rule by allocating a tfm, this then triggers a
full test of the algorithm which may dead-lock in certain cases.

SIMD is only allocating the tfm to get at the alg object, which is
in fact already available as it is what we are registering.  Use
that directly and remove the crypto_alloc_tfm call.

Also remove some obsolete and unused SIMD API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/simd.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
index d2316242a988..be97b97a75dd 100644
--- a/include/crypto/internal/simd.h
+++ b/include/crypto/internal/simd.h
@@ -14,11 +14,10 @@
 struct simd_skcipher_alg;
 struct skcipher_alg;
 
-struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
+struct simd_skcipher_alg *simd_skcipher_create_compat(struct skcipher_alg *ialg,
+						      const char *algname,
 						      const char *drvname,
 						      const char *basename);
-struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
-					       const char *basename);
 void simd_skcipher_free(struct simd_skcipher_alg *alg);
 
 int simd_register_skciphers_compat(struct skcipher_alg *algs, int count,
@@ -32,13 +31,6 @@ void simd_unregister_skciphers(struct skcipher_alg *algs, int count,
 struct simd_aead_alg;
 struct aead_alg;
 
-struct simd_aead_alg *simd_aead_create_compat(const char *algname,
-					      const char *drvname,
-					      const char *basename);
-struct simd_aead_alg *simd_aead_create(const char *algname,
-				       const char *basename);
-void simd_aead_free(struct simd_aead_alg *alg);
-
 int simd_register_aeads_compat(struct aead_alg *algs, int count,
 			       struct simd_aead_alg **simd_algs);
 
-- 
cgit v1.2.3


From 7d3aed652d090508990d245f9d80dcc481910d02 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 22 Aug 2024 05:51:54 +0000
Subject: net: refactor ->ndo_bpf calls into dev_xdp_propagate

When net devices propagate xdp configurations to slave devices,
we will need to perform a memory provider check to ensure we're
not binding xdp to a device using unreadable netmem.

Currently the ->ndo_bpf calls in a few places. Adding checks to all
these places would not be ideal.

Refactor all the ->ndo_bpf calls into one place where we can add this
check in the future.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 289a6133769c..7dfa836d9dbf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3925,6 +3925,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 u8 dev_xdp_prog_count(struct net_device *dev);
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
-- 
cgit v1.2.3


From d29cb3726f03cdac7889f0109a7cb84f79e168a8 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 7 Aug 2024 15:18:13 +0100
Subject: io_uring: add absolute mode wait timeouts

In addition to current relative timeouts for the waiting loop, where the
timespec argument specifies the maximum time it can wait for, add
support for the absolute mode, with the value carrying a CLOCK_MONOTONIC
absolute time until which we should return control back to the user.

Suggested-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4d5b74d67ada882590b2e42aa3aa7117bbf6b55f.1723039801.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index adc2524fd8e3..6a81f55fcd0d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -507,6 +507,7 @@ struct io_cqring_offsets {
 #define IORING_ENTER_SQ_WAIT		(1U << 2)
 #define IORING_ENTER_EXT_ARG		(1U << 3)
 #define IORING_ENTER_REGISTERED_RING	(1U << 4)
+#define IORING_ENTER_ABS_TIMER		(1U << 5)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
-- 
cgit v1.2.3


From 2b8e976b984278edbeab3251d370e76d237699f9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 7 Aug 2024 15:18:14 +0100
Subject: io_uring: user registered clockid for wait timeouts

Add a new registration opcode IORING_REGISTER_CLOCK, which allows the
user to select which clock id it wants to use with CQ waiting timeouts.
It only allows a subset of all posix clocks and currently supports
CLOCK_MONOTONIC and CLOCK_BOOTTIME.

Suggested-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +++
 include/uapi/linux/io_uring.h  | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..4b9ba523978d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -239,6 +239,9 @@ struct io_ring_ctx {
 		struct io_rings		*rings;
 		struct percpu_ref	refs;
 
+		clockid_t		clockid;
+		enum tk_offsets		clock_offset;
+
 		enum task_work_notify_mode	notify_method;
 		unsigned			sq_thread_idle;
 	} ____cacheline_aligned_in_smp;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6a81f55fcd0d..7af716136df9 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -596,6 +596,8 @@ enum io_uring_register_op {
 	IORING_REGISTER_NAPI			= 27,
 	IORING_UNREGISTER_NAPI			= 28,
 
+	IORING_REGISTER_CLOCK			= 29,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -676,6 +678,11 @@ struct io_uring_restriction {
 	__u32 resv2[3];
 };
 
+struct io_uring_clock_register {
+	__u32	clockid;
+	__u32	__resv[3];
+};
+
 struct io_uring_buf {
 	__u64	addr;
 	__u32	len;
-- 
cgit v1.2.3


From 7ed9e09e2d13d5d43385153bba4734cb0eafd7fd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 4 Jan 2024 10:46:30 -0700
Subject: io_uring: wire up min batch wake timeout

Expose min_wait_usec in io_uring_getevents_arg, replacing the pad member
that is currently in there. The value is in usecs, which is explained in
the name as well.

Note that if min_wait_usec and a normal timeout is used in conjunction,
the normal timeout is still relative to the base time. For example, if
min_wait_usec is set to 100 and the normal timeout is 1000, the max
total time waited is still 1000. This also means that if the normal
timeout is shorter than min_wait_usec, then only the min_wait_usec will
take effect.

See previous commit for an explanation of how this works.

IORING_FEAT_MIN_TIMEOUT is added as a feature flag for this, as
applications doing submit_and_wait_timeout() style operations will
generally not see the -EINVAL from the wait side as they return the
number of IOs submitted. Only if no IOs are submitted will the -EINVAL
bubble back up to the application.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7af716136df9..042eab793e26 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -543,6 +543,7 @@ struct io_uring_params {
 #define IORING_FEAT_LINKED_FILE		(1U << 12)
 #define IORING_FEAT_REG_REG_RING	(1U << 13)
 #define IORING_FEAT_RECVSEND_BUNDLE	(1U << 14)
+#define IORING_FEAT_MIN_TIMEOUT		(1U << 15)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -766,7 +767,7 @@ enum io_uring_register_restriction_op {
 struct io_uring_getevents_arg {
 	__u64	sigmask;
 	__u32	sigmask_sz;
-	__u32	pad;
+	__u32	min_wait_usec;
 	__u64	ts;
 };
 
-- 
cgit v1.2.3


From 6729c73103bd7a0e60b0c980b51b5434010b4502 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 23 Aug 2024 17:11:09 +0300
Subject: drm/ttm: fix kernel-doc typo for @trylock_only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

s/tryock_only/trylock_only/

Fixes: da966b82bf3d ("drm/ttm: Provide a generic LRU walker helper")
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240823141110.3431423-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/ttm/ttm_bo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index d1a732d56259..7294dde240fb 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -222,7 +222,7 @@ struct ttm_lru_walk {
 	struct ttm_operation_ctx *ctx;
 	/** @ticket: The struct ww_acquire_ctx if any. */
 	struct ww_acquire_ctx *ticket;
-	/** @tryock_only: Only use trylock for locking. */
+	/** @trylock_only: Only use trylock for locking. */
 	bool trylock_only;
 };
 
-- 
cgit v1.2.3


From 64aa494de6fa7289ba9d1de4ba331a1aa8ae094f Mon Sep 17 00:00:00 2001
From: Inochi Amaoto <inochiama@outlook.com>
Date: Fri, 2 Aug 2024 08:35:16 +0800
Subject: dt-bindings: pinctrl: Add pinctrl for Sophgo CV1800 series SoC.

Add pinctrl support for Sophgo CV1800 series SoC.

Signed-off-by: Inochi Amaoto <inochiama@outlook.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/IA1PR20MB4953680DE7977CAD906DBDB4BBB32@IA1PR20MB4953.namprd20.prod.outlook.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/dt-bindings/pinctrl/pinctrl-cv1800b.h |  63 +++++++++++++
 include/dt-bindings/pinctrl/pinctrl-cv1812h.h | 127 ++++++++++++++++++++++++++
 include/dt-bindings/pinctrl/pinctrl-cv18xx.h  |  19 ++++
 include/dt-bindings/pinctrl/pinctrl-sg2000.h  | 127 ++++++++++++++++++++++++++
 include/dt-bindings/pinctrl/pinctrl-sg2002.h  |  79 ++++++++++++++++
 5 files changed, 415 insertions(+)
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv1800b.h
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv1812h.h
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv18xx.h
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-sg2000.h
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-sg2002.h

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/pinctrl-cv1800b.h b/include/dt-bindings/pinctrl/pinctrl-cv1800b.h
new file mode 100644
index 000000000000..0593fc33d470
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-cv1800b.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (C) 2024 Inochi Amaoto <inochiama@outlook.com>
+ *
+ * This file is generated from vendor pinout definition.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_CV1800B_H
+#define _DT_BINDINGS_PINCTRL_CV1800B_H
+
+#include <dt-bindings/pinctrl/pinctrl-cv18xx.h>
+
+#define PIN_AUD_AOUTR			1
+#define PIN_SD0_CLK			3
+#define PIN_SD0_CMD			4
+#define PIN_SD0_D0			5
+#define PIN_SD0_D1			7
+#define PIN_SD0_D2			8
+#define PIN_SD0_D3			9
+#define PIN_SD0_CD			11
+#define PIN_SD0_PWR_EN			12
+#define PIN_SPK_EN			14
+#define PIN_UART0_TX			15
+#define PIN_UART0_RX			16
+#define PIN_SPINOR_HOLD_X		17
+#define PIN_SPINOR_SCK			18
+#define PIN_SPINOR_MOSI			19
+#define PIN_SPINOR_WP_X			20
+#define PIN_SPINOR_MISO			21
+#define PIN_SPINOR_CS_X			22
+#define PIN_IIC0_SCL			23
+#define PIN_IIC0_SDA			24
+#define PIN_AUX0			25
+#define PIN_PWR_VBAT_DET		30
+#define PIN_PWR_SEQ2			31
+#define PIN_XTAL_XIN			33
+#define PIN_SD1_GPIO0			35
+#define PIN_SD1_GPIO1			36
+#define PIN_SD1_D3			38
+#define PIN_SD1_D2			39
+#define PIN_SD1_D1			40
+#define PIN_SD1_D0			41
+#define PIN_SD1_CMD			42
+#define PIN_SD1_CLK			43
+#define PIN_ADC1			44
+#define PIN_USB_VBUS_DET		45
+#define PIN_ETH_TXP			47
+#define PIN_ETH_TXM			48
+#define PIN_ETH_RXP			49
+#define PIN_ETH_RXM			50
+#define PIN_MIPIRX4N			56
+#define PIN_MIPIRX4P			57
+#define PIN_MIPIRX3N			58
+#define PIN_MIPIRX3P			59
+#define PIN_MIPIRX2N			60
+#define PIN_MIPIRX2P			61
+#define PIN_MIPIRX1N			62
+#define PIN_MIPIRX1P			63
+#define PIN_MIPIRX0N			64
+#define PIN_MIPIRX0P			65
+#define PIN_AUD_AINL_MIC		67
+
+#endif /* _DT_BINDINGS_PINCTRL_CV1800B_H */
diff --git a/include/dt-bindings/pinctrl/pinctrl-cv1812h.h b/include/dt-bindings/pinctrl/pinctrl-cv1812h.h
new file mode 100644
index 000000000000..2908de347919
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-cv1812h.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (C) 2024 Inochi Amaoto <inochiama@outlook.com>
+ *
+ * This file is generated from vendor pinout definition.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_CV1812H_H
+#define _DT_BINDINGS_PINCTRL_CV1812H_H
+
+#include <dt-bindings/pinctrl/pinctrl-cv18xx.h>
+
+#define PINPOS(row, col)			\
+	((((row) - 'A' + 1) << 8) + ((col) - 1))
+
+#define PIN_MIPI_TXM4			PINPOS('A', 2)
+#define PIN_MIPIRX0N			PINPOS('A', 4)
+#define PIN_MIPIRX3P			PINPOS('A', 6)
+#define PIN_MIPIRX4P			PINPOS('A', 7)
+#define PIN_VIVO_D2			PINPOS('A', 9)
+#define PIN_VIVO_D3			PINPOS('A', 10)
+#define PIN_VIVO_D10			PINPOS('A', 12)
+#define PIN_USB_VBUS_DET		PINPOS('A', 13)
+#define PIN_MIPI_TXP3			PINPOS('B', 1)
+#define PIN_MIPI_TXM3			PINPOS('B', 2)
+#define PIN_MIPI_TXP4			PINPOS('B', 3)
+#define PIN_MIPIRX0P			PINPOS('B', 4)
+#define PIN_MIPIRX1N			PINPOS('B', 5)
+#define PIN_MIPIRX2N			PINPOS('B', 6)
+#define PIN_MIPIRX4N			PINPOS('B', 7)
+#define PIN_MIPIRX5N			PINPOS('B', 8)
+#define PIN_VIVO_D1			PINPOS('B', 9)
+#define PIN_VIVO_D5			PINPOS('B', 10)
+#define PIN_VIVO_D7			PINPOS('B', 11)
+#define PIN_VIVO_D9			PINPOS('B', 12)
+#define PIN_USB_ID			PINPOS('B', 13)
+#define PIN_ETH_RXM			PINPOS('B', 15)
+#define PIN_MIPI_TXP2			PINPOS('C', 1)
+#define PIN_MIPI_TXM2			PINPOS('C', 2)
+#define PIN_CAM_PD0			PINPOS('C', 3)
+#define PIN_CAM_MCLK0			PINPOS('C', 4)
+#define PIN_MIPIRX1P			PINPOS('C', 5)
+#define PIN_MIPIRX2P			PINPOS('C', 6)
+#define PIN_MIPIRX3N			PINPOS('C', 7)
+#define PIN_MIPIRX5P			PINPOS('C', 8)
+#define PIN_VIVO_CLK			PINPOS('C', 9)
+#define PIN_VIVO_D6			PINPOS('C', 10)
+#define PIN_VIVO_D8			PINPOS('C', 11)
+#define PIN_USB_VBUS_EN			PINPOS('C', 12)
+#define PIN_ETH_RXP			PINPOS('C', 14)
+#define PIN_GPIO_RTX			PINPOS('C', 15)
+#define PIN_MIPI_TXP1			PINPOS('D', 1)
+#define PIN_MIPI_TXM1			PINPOS('D', 2)
+#define PIN_CAM_MCLK1			PINPOS('D', 3)
+#define PIN_IIC3_SCL			PINPOS('D', 4)
+#define PIN_VIVO_D4			PINPOS('D', 10)
+#define PIN_ETH_TXM			PINPOS('D', 14)
+#define PIN_ETH_TXP			PINPOS('D', 15)
+#define PIN_MIPI_TXP0			PINPOS('E', 1)
+#define PIN_MIPI_TXM0			PINPOS('E', 2)
+#define PIN_CAM_PD1			PINPOS('E', 4)
+#define PIN_CAM_RST0			PINPOS('E', 5)
+#define PIN_VIVO_D0			PINPOS('E', 10)
+#define PIN_ADC1			PINPOS('E', 13)
+#define PIN_ADC2			PINPOS('E', 14)
+#define PIN_ADC3			PINPOS('E', 15)
+#define PIN_AUD_AOUTL			PINPOS('F', 2)
+#define PIN_IIC3_SDA			PINPOS('F', 4)
+#define PIN_SD1_D2			PINPOS('F', 14)
+#define PIN_AUD_AOUTR			PINPOS('G', 2)
+#define PIN_SD1_D3			PINPOS('G', 13)
+#define PIN_SD1_CLK			PINPOS('G', 14)
+#define PIN_SD1_CMD			PINPOS('G', 15)
+#define PIN_AUD_AINL_MIC		PINPOS('H', 1)
+#define PIN_RSTN			PINPOS('H', 12)
+#define PIN_PWM0_BUCK			PINPOS('H', 13)
+#define PIN_SD1_D1			PINPOS('H', 14)
+#define PIN_SD1_D0			PINPOS('H', 15)
+#define PIN_AUD_AINR_MIC		PINPOS('J', 1)
+#define PIN_IIC2_SCL			PINPOS('J', 13)
+#define PIN_IIC2_SDA			PINPOS('J', 14)
+#define PIN_SD0_CD			PINPOS('K', 2)
+#define PIN_SD0_D1			PINPOS('K', 3)
+#define PIN_UART2_RX			PINPOS('K', 13)
+#define PIN_UART2_CTS			PINPOS('K', 14)
+#define PIN_UART2_TX			PINPOS('K', 15)
+#define PIN_SD0_CLK			PINPOS('L', 1)
+#define PIN_SD0_D0			PINPOS('L', 2)
+#define PIN_SD0_CMD			PINPOS('L', 3)
+#define PIN_CLK32K			PINPOS('L', 14)
+#define PIN_UART2_RTS			PINPOS('L', 15)
+#define PIN_SD0_D3			PINPOS('M', 1)
+#define PIN_SD0_D2			PINPOS('M', 2)
+#define PIN_UART0_RX			PINPOS('M', 4)
+#define PIN_UART0_TX			PINPOS('M', 5)
+#define PIN_JTAG_CPU_TRST		PINPOS('M', 6)
+#define PIN_PWR_ON			PINPOS('M', 11)
+#define PIN_PWR_GPIO2			PINPOS('M', 12)
+#define PIN_PWR_GPIO0			PINPOS('M', 13)
+#define PIN_CLK25M			PINPOS('M', 14)
+#define PIN_SD0_PWR_EN			PINPOS('N', 1)
+#define PIN_SPK_EN			PINPOS('N', 3)
+#define PIN_JTAG_CPU_TCK		PINPOS('N', 4)
+#define PIN_JTAG_CPU_TMS		PINPOS('N', 6)
+#define PIN_PWR_WAKEUP1			PINPOS('N', 11)
+#define PIN_PWR_WAKEUP0			PINPOS('N', 12)
+#define PIN_PWR_GPIO1			PINPOS('N', 13)
+#define PIN_EMMC_DAT3			PINPOS('P', 1)
+#define PIN_EMMC_DAT0			PINPOS('P', 2)
+#define PIN_EMMC_DAT2			PINPOS('P', 3)
+#define PIN_EMMC_RSTN			PINPOS('P', 4)
+#define PIN_AUX0			PINPOS('P', 5)
+#define PIN_IIC0_SDA			PINPOS('P', 6)
+#define PIN_PWR_SEQ3			PINPOS('P', 10)
+#define PIN_PWR_VBAT_DET		PINPOS('P', 11)
+#define PIN_PWR_SEQ1			PINPOS('P', 12)
+#define PIN_PWR_BUTTON1			PINPOS('P', 13)
+#define PIN_EMMC_DAT1			PINPOS('R', 2)
+#define PIN_EMMC_CMD			PINPOS('R', 3)
+#define PIN_EMMC_CLK			PINPOS('R', 4)
+#define PIN_IIC0_SCL			PINPOS('R', 6)
+#define PIN_GPIO_ZQ			PINPOS('R', 10)
+#define PIN_PWR_RSTN			PINPOS('R', 11)
+#define PIN_PWR_SEQ2			PINPOS('R', 12)
+#define PIN_XTAL_XIN			PINPOS('R', 13)
+
+#endif /* _DT_BINDINGS_PINCTRL_CV1812H_H */
diff --git a/include/dt-bindings/pinctrl/pinctrl-cv18xx.h b/include/dt-bindings/pinctrl/pinctrl-cv18xx.h
new file mode 100644
index 000000000000..bc92ad1067ec
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-cv18xx.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (C) 2023 Sophgo Ltd.
+ *
+ * Author: Inochi Amaoto <inochiama@outlook.com>
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_CV18XX_H
+#define _DT_BINDINGS_PINCTRL_CV18XX_H
+
+#define PIN_MUX_INVALD				0xff
+
+#define PINMUX2(pin, mux, mux2)	\
+	(((pin) & 0xffff) | (((mux) & 0xff) << 16) | (((mux2) & 0xff) << 24))
+
+#define PINMUX(pin, mux) \
+	PINMUX2(pin, mux, PIN_MUX_INVALD)
+
+#endif /* _DT_BINDINGS_PINCTRL_CV18XX_H */
diff --git a/include/dt-bindings/pinctrl/pinctrl-sg2000.h b/include/dt-bindings/pinctrl/pinctrl-sg2000.h
new file mode 100644
index 000000000000..4871f9a7c6c1
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-sg2000.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (C) 2024 Inochi Amaoto <inochiama@outlook.com>
+ *
+ * This file is generated from vendor pinout definition.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_SG2000_H
+#define _DT_BINDINGS_PINCTRL_SG2000_H
+
+#include <dt-bindings/pinctrl/pinctrl-cv18xx.h>
+
+#define PINPOS(row, col)			\
+	((((row) - 'A' + 1) << 8) + ((col) - 1))
+
+#define PIN_MIPI_TXM4			PINPOS('A', 2)
+#define PIN_MIPIRX0N			PINPOS('A', 4)
+#define PIN_MIPIRX3P			PINPOS('A', 6)
+#define PIN_MIPIRX4P			PINPOS('A', 7)
+#define PIN_VIVO_D2			PINPOS('A', 9)
+#define PIN_VIVO_D3			PINPOS('A', 10)
+#define PIN_VIVO_D10			PINPOS('A', 12)
+#define PIN_USB_VBUS_DET		PINPOS('A', 13)
+#define PIN_MIPI_TXP3			PINPOS('B', 1)
+#define PIN_MIPI_TXM3			PINPOS('B', 2)
+#define PIN_MIPI_TXP4			PINPOS('B', 3)
+#define PIN_MIPIRX0P			PINPOS('B', 4)
+#define PIN_MIPIRX1N			PINPOS('B', 5)
+#define PIN_MIPIRX2N			PINPOS('B', 6)
+#define PIN_MIPIRX4N			PINPOS('B', 7)
+#define PIN_MIPIRX5N			PINPOS('B', 8)
+#define PIN_VIVO_D1			PINPOS('B', 9)
+#define PIN_VIVO_D5			PINPOS('B', 10)
+#define PIN_VIVO_D7			PINPOS('B', 11)
+#define PIN_VIVO_D9			PINPOS('B', 12)
+#define PIN_USB_ID			PINPOS('B', 13)
+#define PIN_ETH_RXM			PINPOS('B', 15)
+#define PIN_MIPI_TXP2			PINPOS('C', 1)
+#define PIN_MIPI_TXM2			PINPOS('C', 2)
+#define PIN_CAM_PD0			PINPOS('C', 3)
+#define PIN_CAM_MCLK0			PINPOS('C', 4)
+#define PIN_MIPIRX1P			PINPOS('C', 5)
+#define PIN_MIPIRX2P			PINPOS('C', 6)
+#define PIN_MIPIRX3N			PINPOS('C', 7)
+#define PIN_MIPIRX5P			PINPOS('C', 8)
+#define PIN_VIVO_CLK			PINPOS('C', 9)
+#define PIN_VIVO_D6			PINPOS('C', 10)
+#define PIN_VIVO_D8			PINPOS('C', 11)
+#define PIN_USB_VBUS_EN			PINPOS('C', 12)
+#define PIN_ETH_RXP			PINPOS('C', 14)
+#define PIN_GPIO_RTX			PINPOS('C', 15)
+#define PIN_MIPI_TXP1			PINPOS('D', 1)
+#define PIN_MIPI_TXM1			PINPOS('D', 2)
+#define PIN_CAM_MCLK1			PINPOS('D', 3)
+#define PIN_IIC3_SCL			PINPOS('D', 4)
+#define PIN_VIVO_D4			PINPOS('D', 10)
+#define PIN_ETH_TXM			PINPOS('D', 14)
+#define PIN_ETH_TXP			PINPOS('D', 15)
+#define PIN_MIPI_TXP0			PINPOS('E', 1)
+#define PIN_MIPI_TXM0			PINPOS('E', 2)
+#define PIN_CAM_PD1			PINPOS('E', 4)
+#define PIN_CAM_RST0			PINPOS('E', 5)
+#define PIN_VIVO_D0			PINPOS('E', 10)
+#define PIN_ADC1			PINPOS('E', 13)
+#define PIN_ADC2			PINPOS('E', 14)
+#define PIN_ADC3			PINPOS('E', 15)
+#define PIN_AUD_AOUTL			PINPOS('F', 2)
+#define PIN_IIC3_SDA			PINPOS('F', 4)
+#define PIN_SD1_D2			PINPOS('F', 14)
+#define PIN_AUD_AOUTR			PINPOS('G', 2)
+#define PIN_SD1_D3			PINPOS('G', 13)
+#define PIN_SD1_CLK			PINPOS('G', 14)
+#define PIN_SD1_CMD			PINPOS('G', 15)
+#define PIN_AUD_AINL_MIC		PINPOS('H', 1)
+#define PIN_RSTN			PINPOS('H', 12)
+#define PIN_PWM0_BUCK			PINPOS('H', 13)
+#define PIN_SD1_D1			PINPOS('H', 14)
+#define PIN_SD1_D0			PINPOS('H', 15)
+#define PIN_AUD_AINR_MIC		PINPOS('J', 1)
+#define PIN_IIC2_SCL			PINPOS('J', 13)
+#define PIN_IIC2_SDA			PINPOS('J', 14)
+#define PIN_SD0_CD			PINPOS('K', 2)
+#define PIN_SD0_D1			PINPOS('K', 3)
+#define PIN_UART2_RX			PINPOS('K', 13)
+#define PIN_UART2_CTS			PINPOS('K', 14)
+#define PIN_UART2_TX			PINPOS('K', 15)
+#define PIN_SD0_CLK			PINPOS('L', 1)
+#define PIN_SD0_D0			PINPOS('L', 2)
+#define PIN_SD0_CMD			PINPOS('L', 3)
+#define PIN_CLK32K			PINPOS('L', 14)
+#define PIN_UART2_RTS			PINPOS('L', 15)
+#define PIN_SD0_D3			PINPOS('M', 1)
+#define PIN_SD0_D2			PINPOS('M', 2)
+#define PIN_UART0_RX			PINPOS('M', 4)
+#define PIN_UART0_TX			PINPOS('M', 5)
+#define PIN_JTAG_CPU_TRST		PINPOS('M', 6)
+#define PIN_PWR_ON			PINPOS('M', 11)
+#define PIN_PWR_GPIO2			PINPOS('M', 12)
+#define PIN_PWR_GPIO0			PINPOS('M', 13)
+#define PIN_CLK25M			PINPOS('M', 14)
+#define PIN_SD0_PWR_EN			PINPOS('N', 1)
+#define PIN_SPK_EN			PINPOS('N', 3)
+#define PIN_JTAG_CPU_TCK		PINPOS('N', 4)
+#define PIN_JTAG_CPU_TMS		PINPOS('N', 6)
+#define PIN_PWR_WAKEUP1			PINPOS('N', 11)
+#define PIN_PWR_WAKEUP0			PINPOS('N', 12)
+#define PIN_PWR_GPIO1			PINPOS('N', 13)
+#define PIN_EMMC_DAT3			PINPOS('P', 1)
+#define PIN_EMMC_DAT0			PINPOS('P', 2)
+#define PIN_EMMC_DAT2			PINPOS('P', 3)
+#define PIN_EMMC_RSTN			PINPOS('P', 4)
+#define PIN_AUX0			PINPOS('P', 5)
+#define PIN_IIC0_SDA			PINPOS('P', 6)
+#define PIN_PWR_SEQ3			PINPOS('P', 10)
+#define PIN_PWR_VBAT_DET		PINPOS('P', 11)
+#define PIN_PWR_SEQ1			PINPOS('P', 12)
+#define PIN_PWR_BUTTON1			PINPOS('P', 13)
+#define PIN_EMMC_DAT1			PINPOS('R', 2)
+#define PIN_EMMC_CMD			PINPOS('R', 3)
+#define PIN_EMMC_CLK			PINPOS('R', 4)
+#define PIN_IIC0_SCL			PINPOS('R', 6)
+#define PIN_GPIO_ZQ			PINPOS('R', 10)
+#define PIN_PWR_RSTN			PINPOS('R', 11)
+#define PIN_PWR_SEQ2			PINPOS('R', 12)
+#define PIN_XTAL_XIN			PINPOS('R', 13)
+
+#endif /* _DT_BINDINGS_PINCTRL_SG2000_H */
diff --git a/include/dt-bindings/pinctrl/pinctrl-sg2002.h b/include/dt-bindings/pinctrl/pinctrl-sg2002.h
new file mode 100644
index 000000000000..3c36cfa0a550
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-sg2002.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (C) 2024 Inochi Amaoto <inochiama@outlook.com>
+ *
+ * This file is generated from vendor pinout definition.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_SG2002_H
+#define _DT_BINDINGS_PINCTRL_SG2002_H
+
+#include <dt-bindings/pinctrl/pinctrl-cv18xx.h>
+
+#define PIN_AUD_AINL_MIC		2
+#define PIN_AUD_AOUTR			4
+#define PIN_SD0_CLK			6
+#define PIN_SD0_CMD			7
+#define PIN_SD0_D0			8
+#define PIN_SD0_D1			10
+#define PIN_SD0_D2			11
+#define PIN_SD0_D3			12
+#define PIN_SD0_CD			14
+#define PIN_SD0_PWR_EN			15
+#define PIN_SPK_EN			17
+#define PIN_UART0_TX			18
+#define PIN_UART0_RX			19
+#define PIN_EMMC_DAT2			20
+#define PIN_EMMC_CLK			21
+#define PIN_EMMC_DAT0			22
+#define PIN_EMMC_DAT3			23
+#define PIN_EMMC_CMD			24
+#define PIN_EMMC_DAT1			25
+#define PIN_JTAG_CPU_TMS		26
+#define PIN_JTAG_CPU_TCK		27
+#define PIN_IIC0_SCL			28
+#define PIN_IIC0_SDA			29
+#define PIN_AUX0			30
+#define PIN_GPIO_ZQ			35
+#define PIN_PWR_VBAT_DET		38
+#define PIN_PWR_RSTN			39
+#define PIN_PWR_SEQ1			40
+#define PIN_PWR_SEQ2			41
+#define PIN_PWR_WAKEUP0			43
+#define PIN_PWR_BUTTON1			44
+#define PIN_XTAL_XIN			45
+#define PIN_PWR_GPIO0			47
+#define PIN_PWR_GPIO1			48
+#define PIN_PWR_GPIO2			49
+#define PIN_SD1_D3			51
+#define PIN_SD1_D2			52
+#define PIN_SD1_D1			53
+#define PIN_SD1_D0			54
+#define PIN_SD1_CMD			55
+#define PIN_SD1_CLK			56
+#define PIN_PWM0_BUCK			58
+#define PIN_ADC1			59
+#define PIN_USB_VBUS_DET		60
+#define PIN_ETH_TXP			62
+#define PIN_ETH_TXM			63
+#define PIN_ETH_RXP			64
+#define PIN_ETH_RXM			65
+#define PIN_GPIO_RTX			67
+#define PIN_MIPIRX4N			72
+#define PIN_MIPIRX4P			73
+#define PIN_MIPIRX3N			74
+#define PIN_MIPIRX3P			75
+#define PIN_MIPIRX2N			76
+#define PIN_MIPIRX2P			77
+#define PIN_MIPIRX1N			78
+#define PIN_MIPIRX1P			79
+#define PIN_MIPIRX0N			80
+#define PIN_MIPIRX0P			81
+#define PIN_MIPI_TXM2			83
+#define PIN_MIPI_TXP2			84
+#define PIN_MIPI_TXM1			85
+#define PIN_MIPI_TXP1			86
+#define PIN_MIPI_TXM0			87
+#define PIN_MIPI_TXP0			88
+
+#endif /* _DT_BINDINGS_PINCTRL_SG2002_H */
-- 
cgit v1.2.3


From 5fd0628918977a0afdc2e6bc562d8751b5d3b8c5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 26 Aug 2024 12:45:22 +0200
Subject: netfilter: nf_tables: restore IP sanity checks for netdev/egress

Subtract network offset to skb->len before performing IPv4 header sanity
checks, then adjust transport offset from offset from mac header.

Jorge Ortiz says:

When small UDP packets (< 4 bytes payload) are sent from eth0,
`meta l4proto udp` condition is not met because `NFT_PKTINFO_L4PROTO` is
not set. This happens because there is a comparison that checks if the
transport header offset exceeds the total length.  This comparison does
not take into account the fact that the skb network offset might be
non-zero in egress mode (e.g., 14 bytes for Ethernet header).

Fixes: 0ae8e4cca787 ("netfilter: nf_tables: set transport offset from mac header for netdev/egress")
Reported-by: Jorge Ortiz <jorge.ortiz.escribano@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_ipv4.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index 60a7d0ce3080..fcf967286e37 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -19,7 +19,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
 static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 {
 	struct iphdr *iph, _iph;
-	u32 len, thoff;
+	u32 len, thoff, skb_len;
 
 	iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
 				 sizeof(*iph), &_iph);
@@ -30,8 +30,10 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 		return -1;
 
 	len = iph_totlen(pkt->skb, iph);
-	thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4);
-	if (pkt->skb->len < len)
+	thoff = iph->ihl * 4;
+	skb_len = pkt->skb->len - skb_network_offset(pkt->skb);
+
+	if (skb_len < len)
 		return -1;
 	else if (len < thoff)
 		return -1;
@@ -40,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 
 	pkt->flags = NFT_PKTINFO_L4PROTO;
 	pkt->tprot = iph->protocol;
-	pkt->thoff = thoff;
+	pkt->thoff = skb_network_offset(pkt->skb) + thoff;
 	pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
 
 	return 0;
-- 
cgit v1.2.3


From 1e9046e3a154608f63ce79edcb01e6afd6b10c7c Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Wed, 14 Aug 2024 17:35:55 +0200
Subject: rpmb: add Replay Protected Memory Block (RPMB) subsystem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A number of storage technologies support a specialised hardware
partition designed to be resistant to replay attacks. The underlying
HW protocols differ but the operations are common. The RPMB partition
cannot be accessed via standard block layer, but by a set of specific
RPMB commands. Such a partition provides authenticated and replay
protected access, hence suitable as a secure storage.

The initial aim of this patch is to provide a simple RPMB driver
interface which can be accessed by the optee driver to facilitate early
RPMB access to OP-TEE OS (secure OS) during the boot time.

A TEE device driver can claim the RPMB interface, for example, via
rpmb_interface_register() or rpmb_dev_find_device(). The RPMB driver
provides a callback to route RPMB frames to the RPMB device accessible
via rpmb_route_frames().

The detailed operation of implementing the access is left to the TEE
device driver itself.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Shyam Saini <shyamsaini@linux.microsoft.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Manuel Traut <manut@mecka.net>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240814153558.708365-2-jens.wiklander@linaro.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/rpmb.h | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 include/linux/rpmb.h

(limited to 'include')

diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h
new file mode 100644
index 000000000000..cccda73eea4d
--- /dev/null
+++ b/include/linux/rpmb.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Intel Corp. All rights reserved
+ * Copyright (C) 2021-2022 Linaro Ltd
+ */
+#ifndef __RPMB_H__
+#define __RPMB_H__
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+/**
+ * enum rpmb_type - type of underlying storage technology
+ *
+ * @RPMB_TYPE_EMMC  : emmc (JESD84-B50.1)
+ * @RPMB_TYPE_UFS   : UFS (JESD220)
+ * @RPMB_TYPE_NVME  : NVM Express
+ */
+enum rpmb_type {
+	RPMB_TYPE_EMMC,
+	RPMB_TYPE_UFS,
+	RPMB_TYPE_NVME,
+};
+
+/**
+ * struct rpmb_descr - RPMB description provided by the underlying block device
+ *
+ * @type             : block device type
+ * @route_frames     : routes frames to and from the RPMB device
+ * @dev_id           : unique device identifier read from the hardware
+ * @dev_id_len       : length of unique device identifier
+ * @reliable_wr_count: number of sectors that can be written in one access
+ * @capacity         : capacity of the device in units of 128K
+ *
+ * @dev_id is intended to be used as input when deriving the authenticaion key.
+ */
+struct rpmb_descr {
+	enum rpmb_type type;
+	int (*route_frames)(struct device *dev, u8 *req, unsigned int req_len,
+			    u8 *resp, unsigned int resp_len);
+	u8 *dev_id;
+	size_t dev_id_len;
+	u16 reliable_wr_count;
+	u16 capacity;
+};
+
+/**
+ * struct rpmb_dev - device which can support RPMB partition
+ *
+ * @dev              : device
+ * @id               : device_id
+ * @list_node        : linked list node
+ * @descr            : RPMB description
+ */
+struct rpmb_dev {
+	struct device dev;
+	int id;
+	struct list_head list_node;
+	struct rpmb_descr descr;
+};
+
+#define to_rpmb_dev(x)		container_of((x), struct rpmb_dev, dev)
+
+#if IS_ENABLED(CONFIG_RPMB)
+struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev);
+void rpmb_dev_put(struct rpmb_dev *rdev);
+struct rpmb_dev *rpmb_dev_find_device(const void *data,
+				      const struct rpmb_dev *start,
+				      int (*match)(struct device *dev,
+						   const void *data));
+int rpmb_interface_register(struct class_interface *intf);
+void rpmb_interface_unregister(struct class_interface *intf);
+struct rpmb_dev *rpmb_dev_register(struct device *dev,
+				   struct rpmb_descr *descr);
+int rpmb_dev_unregister(struct rpmb_dev *rdev);
+
+int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req,
+		      unsigned int req_len, u8 *resp, unsigned int resp_len);
+
+#else
+static inline struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev)
+{
+	return NULL;
+}
+
+static inline void rpmb_dev_put(struct rpmb_dev *rdev) { }
+
+static inline struct rpmb_dev *
+rpmb_dev_find_device(const void *data, const struct rpmb_dev *start,
+		     int (*match)(struct device *dev, const void *data))
+{
+	return NULL;
+}
+
+static inline int rpmb_interface_register(struct class_interface *intf)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void rpmb_interface_unregister(struct class_interface *intf)
+{
+}
+
+static inline struct rpmb_dev *
+rpmb_dev_register(struct device *dev, struct rpmb_descr *descr)
+{
+	return NULL;
+}
+
+static inline int rpmb_dev_unregister(struct rpmb_dev *dev)
+{
+	return 0;
+}
+
+static inline int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req,
+				    unsigned int req_len, u8 *resp,
+				    unsigned int resp_len)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_RPMB */
+
+#endif /* __RPMB_H__ */
-- 
cgit v1.2.3


From c30b855e814d9094e369a19fbd86c9bb5badc154 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Wed, 14 Aug 2024 17:35:57 +0200
Subject: tee: add tee_device_set_dev_groups()

Add tee_device_set_dev_groups() to TEE drivers to supply driver specific
attribute groups. The class specific attributes are from now on added
via the tee_class, which currently only consist of implementation_id.

Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Link: https://lore.kernel.org/r/20240814153558.708365-4-jens.wiklander@linaro.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/tee_core.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index efd16ed52315..a38494d6b5f4 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -154,6 +154,18 @@ int tee_device_register(struct tee_device *teedev);
  */
 void tee_device_unregister(struct tee_device *teedev);
 
+/**
+ * tee_device_set_dev_groups() - Set device attribute groups
+ * @teedev:	Device to register
+ * @dev_groups: Attribute groups
+ *
+ * Assigns the provided @dev_groups to the @teedev to be registered later
+ * with tee_device_register(). Calling this function is optional, but if
+ * it's called it must be called before tee_device_register().
+ */
+void tee_device_set_dev_groups(struct tee_device *teedev,
+			       const struct attribute_group **dev_groups);
+
 /**
  * tee_session_calc_client_uuid() - Calculates client UUID for session
  * @uuid:		Resulting UUID
-- 
cgit v1.2.3


From d3596c73011d1b0e9b43ae12f1e0f491d6bb96eb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 21 Aug 2024 15:24:06 +0200
Subject: mmc: core: Remove struct mmc_context_info

The 'mmc_context_info' structure is unused.

It has been introduced in:

  - commit 2220eedfd7ae ("mmc: fix async request mechanism for sequential
    read scenarios")

in 2013-02 and its usages have been removed in:

  - commit 126b62700386 ("mmc: core: Remove code no longer needed after the
    switch to blk-mq")
  - commit 0fbfd1251830 ("mmc: block: Remove code no longer needed after
    the switch to blk-mq")

in 2017-12.

Now remove this unused structure.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
Link: https://lore.kernel.org/r/232106a8a6a374dee25feea9b94498361568c10b.1724246389.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 88c6a76042ee..f85df7d045ca 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -291,20 +291,6 @@ struct mmc_slot {
 	void *handler_priv;
 };
 
-/**
- * mmc_context_info - synchronization details for mmc context
- * @is_done_rcv		wake up reason was done request
- * @is_new_req		wake up reason was new request
- * @is_waiting_last_req	mmc context waiting for single running request
- * @wait		wait queue
- */
-struct mmc_context_info {
-	bool			is_done_rcv;
-	bool			is_new_req;
-	bool			is_waiting_last_req;
-	wait_queue_head_t	wait;
-};
-
 struct regulator;
 struct mmc_pwrseq;
 
-- 
cgit v1.2.3


From f5e1638bf3c785600e2ab53e5532007af5296581 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org>
Date: Sat, 24 Aug 2024 01:59:17 +0300
Subject: mmc: core: remove left-over data structure declarations

The last users of 'enum mmc_blk_status' and 'struct mmc_async_req'
were removed by commit 126b62700386 ("mmc: core: Remove code no longer
needed after the switch to blk-mq") in 2017, remove these two left-over
data structures.

Signed-off-by: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org>
Link: https://lore.kernel.org/r/20240823225917.2826156-1-vladimir.zapolskiy@linaro.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 12 ------------
 include/linux/mmc/host.h | 10 ----------
 2 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 2c7928a50907..f0ac2e469b32 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -11,18 +11,6 @@
 struct mmc_data;
 struct mmc_request;
 
-enum mmc_blk_status {
-	MMC_BLK_SUCCESS = 0,
-	MMC_BLK_PARTIAL,
-	MMC_BLK_CMD_ERR,
-	MMC_BLK_RETRY,
-	MMC_BLK_ABORT,
-	MMC_BLK_DATA_ERR,
-	MMC_BLK_ECC_ERR,
-	MMC_BLK_NOMEDIUM,
-	MMC_BLK_NEW_REQUEST,
-};
-
 struct mmc_command {
 	u32			opcode;
 	u32			arg;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f85df7d045ca..ac01cd1622ef 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -264,16 +264,6 @@ struct mmc_cqe_ops {
 	void	(*cqe_recovery_finish)(struct mmc_host *host);
 };
 
-struct mmc_async_req {
-	/* active mmc request */
-	struct mmc_request	*mrq;
-	/*
-	 * Check error status of completed mmc request.
-	 * Returns 0 if success otherwise non zero.
-	 */
-	enum mmc_blk_status (*err_check)(struct mmc_card *, struct mmc_async_req *);
-};
-
 /**
  * struct mmc_slot - MMC slot functions
  *
-- 
cgit v1.2.3


From f2c6dbd220170c2396fb019ead67fbada1e23ebd Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Fri, 16 Aug 2024 12:51:22 +0800
Subject: kunit: Device wrappers should also manage driver name

kunit_driver_create() accepts a name for the driver, but does not copy
it, so if that name is either on the stack, or otherwise freed, we end
up with a use-after-free when the driver is cleaned up.

Instead, strdup() the name, and manage it as another KUnit allocation.
As there was no existing kunit_kstrdup(), we add one. Further, add a
kunit_ variant of strdup_const() and kfree_const(), so we don't need to
allocate and manage the string in the majority of cases where it's a
constant.

However, these are inline functions, and is_kernel_rodata() only works
for built-in code. This causes problems in two cases:
- If kunit is built as a module, __{start,end}_rodata is not defined.
- If a kunit test using these functions is built as a module, it will
  suffer the same fate.

This fixes a KASAN splat with overflow.overflow_allocation_test, when
built as a module.

Restrict the is_kernel_rodata() case to when KUnit is built as a module,
which fixes the first case, at the cost of losing the optimisation.

Also, make kunit_{kstrdup,kfree}_const non-inline, so that other modules
using them will not accidentally depend on is_kernel_rodata(). If KUnit
is built-in, they'll benefit from the optimisation, if KUnit is not,
they won't, but the string will be properly duplicated.

Fixes: d03c720e03bd ("kunit: Add APIs for managing devices")
Reported-by: Nico Pache <npache@redhat.com>
Closes: https://groups.google.com/g/kunit-dev/c/81V9b9QYON0
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Rae Moar <rmoar@google.com>
Signed-off-by: David Gow <davidgow@google.com>
Tested-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index e2a1f0928e8b..5ac237c949a0 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 
 #include <asm/rwonce.h>
+#include <asm/sections.h>
 
 /* Static key: true if any KUnit tests are currently running */
 DECLARE_STATIC_KEY_FALSE(kunit_running);
@@ -480,6 +481,53 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp
 	return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO);
 }
 
+
+/**
+ * kunit_kfree_const() - conditionally free test managed memory
+ * @x: pointer to the memory
+ *
+ * Calls kunit_kfree() only if @x is not in .rodata section.
+ * See kunit_kstrdup_const() for more information.
+ */
+void kunit_kfree_const(struct kunit *test, const void *x);
+
+/**
+ * kunit_kstrdup() - Duplicates a string into a test managed allocation.
+ *
+ * @test: The test context object.
+ * @str: The NULL-terminated string to duplicate.
+ * @gfp: flags passed to underlying kmalloc().
+ *
+ * See kstrdup() and kunit_kmalloc_array() for more information.
+ */
+static inline char *kunit_kstrdup(struct kunit *test, const char *str, gfp_t gfp)
+{
+	size_t len;
+	char *buf;
+
+	if (!str)
+		return NULL;
+
+	len = strlen(str) + 1;
+	buf = kunit_kmalloc(test, len, gfp);
+	if (buf)
+		memcpy(buf, str, len);
+	return buf;
+}
+
+/**
+ * kunit_kstrdup_const() - Conditionally duplicates a string into a test managed allocation.
+ *
+ * @test: The test context object.
+ * @str: The NULL-terminated string to duplicate.
+ * @gfp: flags passed to underlying kmalloc().
+ *
+ * Calls kunit_kstrdup() only if @str is not in the rodata section. Must be freed with
+ * kunit_kfree_const() -- not kunit_kfree().
+ * See kstrdup_const() and kunit_kmalloc_array() for more information.
+ */
+const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp);
+
 /**
  * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area
  * @test: The test context object.
-- 
cgit v1.2.3


From 8d3cefaf659265aa82b0373a563fdb9d16a2b947 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 Aug 2024 21:44:50 +0200
Subject: i2c: core: Lock address during client device instantiation

Krzysztof reported an issue [0] which is caused by parallel attempts to
instantiate the same I2C client device. This can happen if driver
supports auto-detection, but certain devices are also instantiated
explicitly.
The original change isn't actually wrong, it just revealed that I2C core
isn't prepared yet to handle this scenario.
Calls to i2c_new_client_device() can be nested, therefore we can't use a
simple mutex here. Parallel instantiation of devices at different addresses
is ok, so we just have to prevent parallel instantiation at the same address.
We can use a bitmap with one bit per 7-bit I2C client address, and atomic
bit operations to set/check/clear bits.
Now a parallel attempt to instantiate a device at the same address will
result in -EBUSY being returned, avoiding the "sysfs: cannot create duplicate
filename" splash.

Note: This patch version includes small cosmetic changes to the Tested-by
      version, only functional change is that address locking is supported
      for slave addresses too.

[0] https://lore.kernel.org/linux-i2c/9479fe4e-eb0c-407e-84c0-bd60c15baf74@ans.pl/T/#m12706546e8e2414d8f1a0dc61c53393f731685cc

Fixes: caba40ec3531 ("eeprom: at24: Probe for DDR3 thermal sensor in the SPD case")
Cc: stable@vger.kernel.org
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7eedd0c662da..8d79440cd8ce 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -761,6 +761,9 @@ struct i2c_adapter {
 	struct regulator *bus_regulator;
 
 	struct dentry *debugfs;
+
+	/* 7bit address space */
+	DECLARE_BITMAP(addrs_in_instantiation, 1 << 7);
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
 
-- 
cgit v1.2.3


From af8a6e65f42c6c89414017cc4d78403e83056172 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 3 May 2024 12:10:52 +0200
Subject: USB: serial: set driver owner when registering drivers

Modules registering drivers with usb_serial_register_drivers() might
forget to set .owner field.  The field is used by some of other kernel
parts for reference counting (try_module_get()), so it is expected that
drivers will set it.

Solve the problem by moving this task away from the drivers to the core
USB serial code, just like we did for platform_driver in
commit 9447057eaff8 ("platform_device: use a macro instead of
platform_driver_register").

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
[ johan: amend commit summary ]
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 include/linux/usb/serial.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1a0a4dc87980..75b2b763f1ba 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -311,8 +311,11 @@ struct usb_serial_driver {
 #define to_usb_serial_driver(d) \
 	container_of(d, struct usb_serial_driver, driver)
 
-int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
-		const char *name, const struct usb_device_id *id_table);
+#define usb_serial_register_drivers(serial_drivers, name, id_table) \
+	__usb_serial_register_drivers(serial_drivers, THIS_MODULE, name, id_table)
+int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
+				  struct module *owner, const char *name,
+				  const struct usb_device_id *id_table);
 void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]);
 void usb_serial_port_softint(struct usb_serial_port *port);
 
-- 
cgit v1.2.3


From 5fd3e2412ade67ea20d855f0aea821c650d27559 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 19 Jun 2024 03:02:42 +0300
Subject: media: v4l2-subdev: Support hybrid links in
 v4l2_subdev_link_validate()

The v4l2_subdev_link_validate() helper function is meant to be used as a
drop-in implementation of a V4L2 subdev entity .link_validate() handler.
It supports subdev-to-subdev links only, and complains if one end of the
link is not a subdev. This forces drivers that have video output devices
connected to subdevs to implement a custom .link_validate() handler,
calling v4l2_subdev_link_validate() for the subdev-to-subdev links, and
performing manual link validation for the video-to-subdev links.

Video devices embed a media entity, and therefore also have a
.link_validate() operation. For video capture devices, the operation
should be manually implemented by drivers for validate the
subdev-to-video links. For video output devices, on the other hand, that
operation is never called, as link validation is performed in the
context of the sink entity.

As a result, we end up forcing drivers to implement a custom
.link_validate() handler for subdevs connected to video output devices,
when the video devices provide an operation that could be used for that
purpose.

To improve that situation, make v4l2_subdev_link_validate() delegate
link validation to the source's .link_validate() operation when the link
source is a video device and the link sink is a subdev. This allows
broader usage of v4l2_subdev_link_validate(), and simplifies drivers by
making video device link validation easy to implement in the video
device .link_validate(), regardless of whether the video device is an
output device or a capture device.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
 include/media/v4l2-subdev.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index bd235d325ff9..8daa0929865c 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1250,6 +1250,12 @@ int v4l2_subdev_link_validate_default(struct v4l2_subdev *sd,
  * calls v4l2_subdev_link_validate_default() to ensure that
  * width, height and the media bus pixel code are equal on both
  * source and sink of the link.
+ *
+ * The function can be used as a drop-in &media_entity_ops.link_validate
+ * implementation for v4l2_subdev instances. It supports all links between
+ * subdevs, as well as links between subdevs and video devices, provided that
+ * the video devices also implement their &media_entity_ops.link_validate
+ * operation.
  */
 int v4l2_subdev_link_validate(struct media_link *link);
 
-- 
cgit v1.2.3


From 67733d7a71503fd3e32eeada371f8aa2516c5c95 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Thu, 1 Aug 2024 20:10:51 -0700
Subject: drm/i915: ARL requires a newer GSC firmware

ARL and MTL share a single GSC firmware blob. However, ARL requires a
newer version of it.

So add differentiate of the PCI ids for ARL from MTL and create ARL as
a sub-platform of MTL. That way, all the existing workarounds and such
still treat ARL as MTL exactly as before. However, now the GSC code
can check for ARL and do an extra version check on the firmware before
committing to it.

Also, the version extraction code has various ways of failing but the
return code was being ignore and so the firmware load would attempt to
continue anyway. Fix that by propagating the return code to the next
level out.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake")
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/intel/i915_pciids.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index b21374f76df2..2bf03ebfcf73 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -772,15 +772,18 @@
 	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
 /* MTL */
+#define INTEL_ARL_IDS(MACRO__, ...) \
+	MACRO__(0x7D41, ## __VA_ARGS__), \
+	MACRO__(0x7D51, ## __VA_ARGS__), \
+	MACRO__(0x7D67, ## __VA_ARGS__), \
+	MACRO__(0x7DD1, ## __VA_ARGS__)
+
 #define INTEL_MTL_IDS(MACRO__, ...) \
+	INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \
 	MACRO__(0x7D40, ## __VA_ARGS__), \
-	MACRO__(0x7D41, ## __VA_ARGS__), \
 	MACRO__(0x7D45, ## __VA_ARGS__), \
-	MACRO__(0x7D51, ## __VA_ARGS__), \
 	MACRO__(0x7D55, ## __VA_ARGS__), \
 	MACRO__(0x7D60, ## __VA_ARGS__), \
-	MACRO__(0x7D67, ## __VA_ARGS__), \
-	MACRO__(0x7DD1, ## __VA_ARGS__), \
 	MACRO__(0x7DD5, ## __VA_ARGS__)
 
 /* LNL */
-- 
cgit v1.2.3


From cf4d37b8157ca085c17fdc1faad737465ff311b9 Mon Sep 17 00:00:00 2001
From: renjun wang <renjunw0@foxmail.com>
Date: Sat, 24 Aug 2024 16:20:14 +0800
Subject: drm/atomic: fix kerneldoc for fake_commit field

According to the context, the function description for fake_commit
should be "prevent the atomic states from being freed too early"

Signed-off-by: renjun wang <renjunw0@foxmail.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/tencent_6EF2603DCCFAD6A8265F8AAD9D6D5BCB9309@qq.com
---
 include/drm/drm_atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 4d7f4c5f2001..31ca88deb10d 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -460,7 +460,7 @@ struct drm_atomic_state {
 	 *
 	 * Used for signaling unbound planes/connectors.
 	 * When a connector or plane is not bound to any CRTC, it's still important
-	 * to preserve linearity to prevent the atomic states from being freed to early.
+	 * to preserve linearity to prevent the atomic states from being freed too early.
 	 *
 	 * This commit (if set) is not bound to any CRTC, but will be completed when
 	 * drm_atomic_helper_commit_hw_done() is called.
-- 
cgit v1.2.3


From 1d4684fbe88dc28e2bf79f5e94a432f0469d2dac Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 2 Aug 2024 17:32:02 -0700
Subject: iommufd: Reorder include files

Reorder include files to alphabetic order to simplify maintenance, and
separate local headers and global headers with a blank line.

No functional change intended.

Link: https://patch.msgid.link/r/7524b037cc05afe19db3c18f863253e1d1554fa2.1722644866.git.nicolinc@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h      | 4 ++--
 include/uapi/linux/iommufd.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index ffc3a949f837..c2f2f6b9148e 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -6,9 +6,9 @@
 #ifndef __LINUX_IOMMUFD_H
 #define __LINUX_IOMMUFD_H
 
-#include <linux/types.h>
-#include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/types.h>
 
 struct device;
 struct iommufd_device;
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 4dde745cfb7e..72010f71c5e4 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -4,8 +4,8 @@
 #ifndef _UAPI_IOMMUFD_H
 #define _UAPI_IOMMUFD_H
 
-#include <linux/types.h>
 #include <linux/ioctl.h>
+#include <linux/types.h>
 
 #define IOMMUFD_TYPE (';')
 
-- 
cgit v1.2.3


From 45c4d994b82b08f0ce5eb50f8da29379c92a391e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Fri, 23 Aug 2024 18:30:47 +0200
Subject: accel: Use XArray instead of IDR for minors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Accel minor management is based on DRM (and is also using struct
drm_minor internally), since DRM is using XArray for minors, it makes
sense to also convert accel.
As the two implementations are identical (only difference being the
underlying xarray), move the accel_minor_* functionality to DRM.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Acked-by: James Zhu <James.Zhu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240823163048.2676257-3-michal.winiarski@intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/drm_accel.h | 18 ++----------------
 include/drm/drm_file.h  |  5 +++++
 2 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h
index f4d3784b1dce..8867ce0be94c 100644
--- a/include/drm/drm_accel.h
+++ b/include/drm/drm_accel.h
@@ -51,11 +51,10 @@
 
 #if IS_ENABLED(CONFIG_DRM_ACCEL)
 
+extern struct xarray accel_minors_xa;
+
 void accel_core_exit(void);
 int accel_core_init(void);
-void accel_minor_remove(int index);
-int accel_minor_alloc(void);
-void accel_minor_replace(struct drm_minor *minor, int index);
 void accel_set_device_instance_params(struct device *kdev, int index);
 int accel_open(struct inode *inode, struct file *filp);
 void accel_debugfs_init(struct drm_device *dev);
@@ -73,19 +72,6 @@ static inline int __init accel_core_init(void)
 	return 0;
 }
 
-static inline void accel_minor_remove(int index)
-{
-}
-
-static inline int accel_minor_alloc(void)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void accel_minor_replace(struct drm_minor *minor, int index)
-{
-}
-
 static inline void accel_set_device_instance_params(struct device *kdev, int index)
 {
 }
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index ab230d3af138..8c0030c77308 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -45,6 +45,8 @@ struct drm_printer;
 struct device;
 struct file;
 
+extern struct xarray drm_minors_xa;
+
 /*
  * FIXME: Not sure we want to have drm_minor here in the end, but to avoid
  * header include loops we need it here for now.
@@ -434,6 +436,9 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv)
 
 void drm_file_update_pid(struct drm_file *);
 
+struct drm_minor *drm_minor_acquire(struct xarray *minors_xa, unsigned int minor_id);
+void drm_minor_release(struct drm_minor *minor);
+
 int drm_open(struct inode *inode, struct file *filp);
 int drm_open_helper(struct file *filp, struct drm_minor *minor);
 ssize_t drm_read(struct file *filp, char __user *buffer,
-- 
cgit v1.2.3


From 03c3d7c74371a46d967fbf41628874ec04ddda96 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 15 Aug 2024 22:11:31 +0200
Subject: nvme-rdma: send cntlid in the RDMA_CM_REQUEST Private Data

When sending a RDMA_CM_REQUEST, the NVMe RDMA Transport Specification
allows you to populate the cntlid field in the RDMA_CM_REQUEST Private
Data.

The cntlid is returned by the target on completion of the first
RDMA_CM_REQUEST command (which creates the admin queue).

The cntlid field can then be populated by the host when the I/O queues
are created (using additional RDMA_CM_REQUEST commands), such that the
target can perform extra validation for additional RDMA_CM_REQUEST
commands.

This additional error code and error message is also added, such that
nvme_rdma_cm_msg() will display the proper error message if the target
fails the RDMA_CM_REQUEST command because of this extra validation.

Signed-off-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-rdma.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index eb2f04d636c8..97c5f00b9aa3 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -25,6 +25,7 @@ enum nvme_rdma_cm_status {
 	NVME_RDMA_CM_NO_RSC		= 0x06,
 	NVME_RDMA_CM_INVALID_IRD	= 0x07,
 	NVME_RDMA_CM_INVALID_ORD	= 0x08,
+	NVME_RDMA_CM_INVALID_CNTLID	= 0x09,
 };
 
 static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
@@ -46,6 +47,8 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
 		return "invalid IRD";
 	case NVME_RDMA_CM_INVALID_ORD:
 		return "Invalid ORD";
+	case NVME_RDMA_CM_INVALID_CNTLID:
+		return "invalid controller ID";
 	default:
 		return "unrecognized reason";
 	}
@@ -64,7 +67,8 @@ struct nvme_rdma_cm_req {
 	__le16		qid;
 	__le16		hrqsize;
 	__le16		hsqsize;
-	u8		rsvd[24];
+	__le16		cntlid;
+	u8		rsvd[22];
 };
 
 /**
-- 
cgit v1.2.3


From abcd3026dd63417692a5e80aff70e7cd9b5c14ea Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 22 Aug 2024 14:07:01 +0200
Subject: ethtool: Extend cable testing interface with result source
 information

Extend the ethtool netlink cable testing interface by adding support for
specifying the source of cable testing results. This allows users to
differentiate between results obtained through different diagnostic
methods.

For example, some TI 10BaseT1L PHYs provide two variants of cable
diagnostics: Time Domain Reflectometry (TDR) and Active Link Cable
Diagnostic (ALCD). By introducing `ETHTOOL_A_CABLE_RESULT_SRC` and
`ETHTOOL_A_CABLE_FAULT_LENGTH_SRC` attributes, this update enables
drivers to indicate whether the result was derived from TDR or ALCD,
improving the clarity and utility of diagnostic information.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20240822120703.1393130-2-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 45d8bcdea056..283305f6b063 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -573,10 +573,20 @@ enum {
 	ETHTOOL_A_CABLE_PAIR_D,
 };
 
+/* Information source for specific results. */
+enum {
+	ETHTOOL_A_CABLE_INF_SRC_UNSPEC,
+	/* Results provided by the Time Domain Reflectometry (TDR) */
+	ETHTOOL_A_CABLE_INF_SRC_TDR,
+	/* Results provided by the Active Link Cable Diagnostic (ALCD) */
+	ETHTOOL_A_CABLE_INF_SRC_ALCD,
+};
+
 enum {
 	ETHTOOL_A_CABLE_RESULT_UNSPEC,
 	ETHTOOL_A_CABLE_RESULT_PAIR,		/* u8 ETHTOOL_A_CABLE_PAIR_ */
 	ETHTOOL_A_CABLE_RESULT_CODE,		/* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
+	ETHTOOL_A_CABLE_RESULT_SRC,		/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
 
 	__ETHTOOL_A_CABLE_RESULT_CNT,
 	ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
@@ -586,6 +596,7 @@ enum {
 	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
 	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,	/* u8 ETHTOOL_A_CABLE_PAIR_ */
 	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,	/* u32 */
+	ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,	/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
 
 	__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
 	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
-- 
cgit v1.2.3


From 4715d87e11ac805ab95a75adfbf93d67dfbdbe81 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 22 Aug 2024 14:07:02 +0200
Subject: ethtool: Add support for specifying information source in cable test
 results

Enhance the ethtool cable test interface by introducing the ability to
specify the source of the diagnostic information for cable test results.
This is particularly useful for PHYs that offer multiple diagnostic
methods, such as Time Domain Reflectometry (TDR) and Active Link Cable
Diagnostic (ALCD).

Key changes:
- Added `ethnl_cable_test_result_with_src` and
  `ethnl_cable_test_fault_length_with_src` functions to allow specifying
  the information source when reporting cable test results.
- Updated existing `ethnl_cable_test_result` and
  `ethnl_cable_test_fault_length` functions to use TDR as the default
  source, ensuring backward compatibility.
- Modified the UAPI to support these new attributes, enabling drivers to
  provide more detailed diagnostic information.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20240822120703.1393130-3-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool_netlink.h | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index fae0dfb9a9c8..aba91335273a 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -23,8 +23,10 @@ struct phy_device;
 int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd);
 void ethnl_cable_test_free(struct phy_device *phydev);
 void ethnl_cable_test_finished(struct phy_device *phydev);
-int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result);
-int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm);
+int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair,
+				     u8 result, u32 src);
+int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair,
+					   u32 cm, u32 src);
 int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV);
 int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV);
 int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last,
@@ -54,14 +56,14 @@ static inline void ethnl_cable_test_free(struct phy_device *phydev)
 static inline void ethnl_cable_test_finished(struct phy_device *phydev)
 {
 }
-static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair,
-					  u8 result)
+static inline int ethnl_cable_test_result_with_src(struct phy_device *phydev,
+						   u8 pair, u8 result, u32 src)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int ethnl_cable_test_fault_length(struct phy_device *phydev,
-						u8 pair, u32 cm)
+static inline int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev,
+							 u8 pair, u32 cm, u32 src)
 {
 	return -EOPNOTSUPP;
 }
@@ -119,4 +121,19 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev)
 }
 
 #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */
+
+static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair,
+					  u8 result)
+{
+	return ethnl_cable_test_result_with_src(phydev, pair, result,
+						ETHTOOL_A_CABLE_INF_SRC_TDR);
+}
+
+static inline int ethnl_cable_test_fault_length(struct phy_device *phydev,
+						u8 pair, u32 cm)
+{
+	return ethnl_cable_test_fault_length_with_src(phydev, pair, cm,
+						      ETHTOOL_A_CABLE_INF_SRC_TDR);
+}
+
 #endif /* _LINUX_ETHTOOL_NETLINK_H_ */
-- 
cgit v1.2.3


From d24dac8eb8111c401b0de40a17760a0a254bcffc Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:22 +0100
Subject: packet: Correct spelling in if_packet.h

Correct spelling in if_packet.h
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20240822-net-spell-v1-1-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_packet.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 9efc42382fdb..1d2718dd9647 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -230,8 +230,8 @@ struct tpacket_hdr_v1 {
 	 * ts_first_pkt:
 	 *		Is always the time-stamp when the block was opened.
 	 *		Case a)	ZERO packets
-	 *			No packets to deal with but atleast you know the
-	 *			time-interval of this block.
+	 *			No packets to deal with but at least you know
+	 *			the time-interval of this block.
 	 *		Case b) Non-zero packets
 	 *			Use the ts of the first packet in the block.
 	 *
@@ -265,7 +265,8 @@ enum tpacket_versions {
    - struct tpacket_hdr
    - pad to TPACKET_ALIGNMENT=16
    - struct sockaddr_ll
-   - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
+   - Gap, chosen so that packet data (Start+tp_net) aligns to
+     TPACKET_ALIGNMENT=16
    - Start+tp_mac: [ Optional MAC header ]
    - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
    - Pad to align to TPACKET_ALIGNMENT=16
-- 
cgit v1.2.3


From c3494460324832ba03ab8f640a0a5e52283b1b15 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:23 +0100
Subject: s390/iucv: Correct spelling in iucv.h

Correct spelling in iucv.h
As reported by codespell.

Cc: Alexandra Winter <wintera@linux.ibm.com>
Cc: Thorsten Winkler <twinkler@linux.ibm.com>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-2-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/iucv/iucv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index 4d114e6d6d23..dd9e93c12260 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -15,7 +15,7 @@
  * To explore any of the IUCV functions, one must first register their
  * program using iucv_register(). Once your program has successfully
  * completed a register, it can exploit the other functions.
- * For furthur reference on all IUCV functionality, refer to the
+ * For further reference on all IUCV functionality, refer to the
  * CP Programming Services book, also available on the web thru
  * www.vm.ibm.com/pubs, manual # SC24-6084
  *
-- 
cgit v1.2.3


From d0193b167f2756de2f8be4d44186294bfacaaafc Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:24 +0100
Subject: ip_tunnel: Correct spelling in ip_tunnels.h

Correct spelling in ip_tunnels.h
As reported by codespell.

Cc: David Ahern <dsahern@kernel.org>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-3-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_tunnels.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1db2417b8ff5..6194fbb564c6 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -573,7 +573,7 @@ static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
 		return 0;
 }
 
-/* Propogate ECN bits out */
+/* Propagate ECN bits out */
 static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 				     const struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 507285b7f9b2bc90f093fa335d43f2a21a2dbef9 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:25 +0100
Subject: ipv6: Correct spelling in ipv6.h

Correct spelling in ip_tunnels.h
As reported by codespell.

Cc: David Ahern <dsahern@kernel.org>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-4-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e7113855a10f..248bfb26e2af 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -851,7 +851,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
 	 *	we should *never* get to this point since that
 	 *	would mean the addrs are equal
 	 *
-	 *	However, we do get to it 8) And exacly, when
+	 *	However, we do get to it 8) And exactly, when
 	 *	addresses are equal 8)
 	 *
 	 *	ip route add 1111::/128 via ...
@@ -973,7 +973,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 	hash = skb_get_hash_flowi6(skb, fl6);
 
 	/* Since this is being sent on the wire obfuscate hash a bit
-	 * to minimize possbility that any useful information to an
+	 * to minimize possibility that any useful information to an
 	 * attacker is leaked. Only lower 20 bits are relevant.
 	 */
 	hash = rol32(hash, 16);
-- 
cgit v1.2.3


From e8ac2dba93eafb928fd9e127cdade3b2216a642c Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:26 +0100
Subject: bonding: Correct spelling in headers

Correct spelling in bond_3ad.h and bond_alb.h.
As reported by codespell.

Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-5-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/bond_3ad.h | 5 ++++-
 include/net/bond_alb.h | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 9ce5ac2bfbad..2053cd8e788a 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -231,7 +231,10 @@ typedef struct port {
 	mux_states_t sm_mux_state;	/* state machine mux state */
 	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
 	tx_states_t sm_tx_state;	/* state machine tx state */
-	u16 sm_tx_timer_counter;	/* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */
+	u16 sm_tx_timer_counter;	/* state machine tx timer counter
+					 * (always on - enter to transmit
+					 *  state 3 time per second)
+					 */
 	u16 sm_churn_actor_timer_counter;
 	u16 sm_churn_partner_timer_counter;
 	u32 churn_actor_count;
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index 9dc082b2d543..e5945427f38d 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -53,7 +53,7 @@ struct slave;
 
 
 struct tlb_client_info {
-	struct slave *tx_slave;	/* A pointer to slave used for transmiting
+	struct slave *tx_slave;	/* A pointer to slave used for transmitting
 				 * packets to a Client that the Hash function
 				 * gave this entry index.
 				 */
-- 
cgit v1.2.3


From 19f1f11c9a8e92a2211a3854bd2cfbd4bb6303f1 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:27 +0100
Subject: net: qualcomm: rmnet: Correct spelling in if_rmnet.h

Correct spelling in if_rmnet.h
As reported by codespell.

Cc: Sean Tranchetti <quic_stranche@quicinc.com>
Signed-off-by: Simon Horman <horms@kernel.org>
Reviewed-by: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Link: https://patch.msgid.link/20240822-net-spell-v1-6-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_rmnet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 839d1e48b85e..c44bf6e80ecb 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -42,7 +42,7 @@ struct rmnet_map_ul_csum_header {
 
 /* csum_info field:
  *  OFFSET:	where (offset in bytes) to insert computed checksum
- *  UDP:	1 = UDP checksum (zero checkum means no checksum)
+ *  UDP:	1 = UDP checksum (zero checksum means no checksum)
  *  ENABLED:	1 = checksum computation requested
  */
 #define MAP_CSUM_UL_OFFSET_MASK		GENMASK(13, 0)
-- 
cgit v1.2.3


From 6899c2549cf7ca554ee2dd8574f0b70945f3ed1b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:28 +0100
Subject: netlabel: Correct spelling in netlabel.h

Correct spelling in netlabel.h.
As reported by codespell.

Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-7-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netlabel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 654bc777d2a7..529160f76cac 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -30,7 +30,7 @@ struct calipso_doi;
 
 /*
  * NetLabel - A management interface for maintaining network packet label
- *            mapping tables for explicit packet labling protocols.
+ *            mapping tables for explicit packet labeling protocols.
  *
  * Network protocols such as CIPSO and RIPSO require a label translation layer
  * to convert the label on the packet into something meaningful on the host
-- 
cgit v1.2.3


From 10d0749a38c3a02f308fe8c4f4ed29bd6412e5fb Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:29 +0100
Subject: NFC: Correct spelling in headers

Correct spelling in NFC headers.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-8-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/nfc/nci.h | 2 +-
 include/net/nfc/nfc.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index e82f55f543bb..dc36519d16aa 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -332,7 +332,7 @@ struct nci_core_init_rsp_1 {
 	__le32	nfcc_features;
 	__u8	num_supported_rf_interfaces;
 	__u8	supported_rf_interfaces[];	/* variable size array */
-	/* continuted in nci_core_init_rsp_2 */
+	/* continued in nci_core_init_rsp_2 */
 } __packed;
 
 struct nci_core_init_rsp_2 {
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 3d07abacf08b..3a3781838c67 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -80,7 +80,7 @@ struct nfc_ops {
 #define NFC_ATR_REQ_GT_OFFSET 14
 
 /**
- * struct nfc_target - NFC target descriptiom
+ * struct nfc_target - NFC target description
  *
  * @sens_res: 2 bytes describing the target SENS_RES response, if the target
  *	is a type A one. The %sens_res most significant byte must be byte 2
@@ -230,10 +230,10 @@ static inline void nfc_set_parent_dev(struct nfc_dev *nfc_dev,
 }
 
 /**
- * nfc_set_drvdata - set driver specifc data
+ * nfc_set_drvdata - set driver specific data
  *
  * @dev: The nfc device
- * @data: Pointer to driver specifc data
+ * @data: Pointer to driver specific data
  */
 static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
 {
@@ -241,7 +241,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
 }
 
 /**
- * nfc_get_drvdata - get driver specifc data
+ * nfc_get_drvdata - get driver specific data
  *
  * @dev: The nfc device
  */
-- 
cgit v1.2.3


From a7a45f02a093d3b3f80a77db334703b5f53ebd44 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:30 +0100
Subject: net: sched: Correct spelling in headers

Correct spelling in pkt_cls.h and red.h.
As reported by codespell.

Cc: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-9-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/pkt_cls.h | 2 +-
 include/net/red.h     | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 41297bd38dff..4880b3a7aced 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -491,7 +491,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
 			struct tcf_pkt_info *);
 
 /**
- * tcf_em_tree_match - evaulate an ematch tree
+ * tcf_em_tree_match - evaluate an ematch tree
  *
  * @skb: socket buffer of the packet in question
  * @tree: ematch tree to be used for evaluation
diff --git a/include/net/red.h b/include/net/red.h
index 802287d52c9e..159a09359fc0 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -40,7 +40,7 @@
 	max_P should be small (not 1), usually 0.01..0.02 is good value.
 
 	max_P is chosen as a number, so that max_P/(th_max-th_min)
-	is a negative power of two in order arithmetics to contain
+	is a negative power of two in order arithmetic to contain
 	only shifts.
 
 
@@ -159,7 +159,7 @@ static inline u32 red_maxp(u8 Plog)
 static inline void red_set_vars(struct red_vars *v)
 {
 	/* Reset average queue length, the value is strictly bound
-	 * to the parameters below, reseting hurts a bit but leaving
+	 * to the parameters below, resetting hurts a bit but leaving
 	 * it might result in an unreasonable qavg for a while. --TGR
 	 */
 	v->qavg		= 0;
@@ -340,7 +340,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p
 {
 	/*
 	 * NOTE: v->qavg is fixed point number with point at Wlog.
-	 * The formula below is equvalent to floating point
+	 * The formula below is equivalent to floating point
 	 * version:
 	 *
 	 * 	qavg = qavg*(1-W) + backlog*W;
@@ -375,7 +375,7 @@ static inline int red_mark_probability(const struct red_parms *p,
 	   OK. qR is random number in the interval
 		(0..1/max_P)*(qth_max-qth_min)
 	   i.e. 0..(2^Plog). If we used floating point
-	   arithmetics, it would be: (2^Plog)*rnd_num,
+	   arithmetic, it would be: (2^Plog)*rnd_num,
 	   where rnd_num is less 1.
 
 	   Taking into account, that qavg have fixed
-- 
cgit v1.2.3


From 7f47fcea8c6b4af25e72e0ebb8d492a181b7ce03 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:31 +0100
Subject: sctp: Correct spelling in headers

Correct spelling in sctp.h and structs.h.
As reported by codespell.

Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Simon Horman <horms@kernel.org>
Acked-by: Xin Long <lucien.xin@gmail.com>
Link: https://patch.msgid.link/20240822-net-spell-v1-10-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sctp/sctp.h    |  2 +-
 include/net/sctp/structs.h | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a2310fa995f6..84e6b9fd5610 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -28,7 +28,7 @@
 #define __net_sctp_h__
 
 /* Header Strategy.
- *    Start getting some control over the header file depencies:
+ *    Start getting some control over the header file dependencies:
  *       includes
  *       constants
  *       structs
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f24a1bbcb3ef..31248cfdfb23 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -521,7 +521,7 @@ struct sctp_datamsg {
 	refcount_t refcnt;
 	/* When is this message no longer interesting to the peer? */
 	unsigned long expires_at;
-	/* Did the messenge fail to send? */
+	/* Did the message fail to send? */
 	int send_error;
 	u8 send_failed:1,
 	   can_delay:1,	/* should this message be Nagle delayed */
@@ -792,7 +792,7 @@ struct sctp_transport {
 		 */
 		hb_sent:1,
 
-		/* Is the Path MTU update pending on this tranport */
+		/* Is the Path MTU update pending on this transport */
 		pmtu_pending:1,
 
 		dst_pending_confirm:1,	/* need to confirm neighbour */
@@ -1223,7 +1223,7 @@ enum sctp_endpoint_type {
 };
 
 /*
- * A common base class to bridge the implmentation view of a
+ * A common base class to bridge the implementation view of a
  * socket (usually listening) endpoint versus an association's
  * local endpoint.
  * This common structure is useful for several purposes:
@@ -1353,7 +1353,7 @@ struct sctp_endpoint {
 	struct rcu_head rcu;
 };
 
-/* Recover the outter endpoint structure. */
+/* Recover the outer endpoint structure. */
 static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
 {
 	struct sctp_endpoint *ep;
@@ -1906,7 +1906,7 @@ struct sctp_association {
 	__u32 rwnd_over;
 
 	/* Keeps treack of rwnd pressure.  This happens when we have
-	 * a window, but not recevie buffer (i.e small packets).  This one
+	 * a window, but not receive buffer (i.e small packets).  This one
 	 * is releases slowly (1 PMTU at a time ).
 	 */
 	__u32 rwnd_press;
@@ -1994,7 +1994,7 @@ struct sctp_association {
 
 	/* ADDIP Section 5.2 Upon reception of an ASCONF Chunk.
 	 *
-	 * This is needed to implement itmes E1 - E4 of the updated
+	 * This is needed to implement items E1 - E4 of the updated
 	 * spec.  Here is the justification:
 	 *
 	 * Since the peer may bundle multiple ASCONF chunks toward us,
@@ -2005,7 +2005,7 @@ struct sctp_association {
 
 	/* These ASCONF chunks are waiting to be sent.
 	 *
-	 * These chunaks can't be pushed to outqueue until receiving
+	 * These chunks can't be pushed to outqueue until receiving
 	 * ASCONF_ACK for the previous ASCONF indicated by
 	 * addip_last_asconf, so as to guarantee that only one ASCONF
 	 * is in flight at any time.
@@ -2059,13 +2059,13 @@ struct sctp_association {
 	struct sctp_transport *new_transport;
 
 	/* SCTP AUTH: list of the endpoint shared keys.  These
-	 * keys are provided out of band by the user applicaton
+	 * keys are provided out of band by the user application
 	 * and can't change during the lifetime of the association
 	 */
 	struct list_head endpoint_shared_keys;
 
 	/* SCTP AUTH:
-	 * The current generated assocaition shared key (secret)
+	 * The current generated association shared key (secret)
 	 */
 	struct sctp_auth_bytes *asoc_shared_key;
 	struct sctp_shared_key *shkey;
@@ -2121,7 +2121,7 @@ enum {
 	SCTP_ASSOC_EYECATCHER = 0xa550c123,
 };
 
-/* Recover the outter association structure. */
+/* Recover the outer association structure. */
 static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
 {
 	struct sctp_association *asoc;
-- 
cgit v1.2.3


From 01d86846a5a5f34662679dd229a0167fc9d92382 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:32 +0100
Subject: x25: Correct spelling in x25.h

Correct spelling in x25.h
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Reviewed-by: Martin Schiller <ms@dev.tdt.de>
Link: https://patch.msgid.link/20240822-net-spell-v1-11-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/x25.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/x25.h b/include/net/x25.h
index 597eb53c471e..5e833cfc864e 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -81,7 +81,7 @@ enum {
 
 #define	X25_DEFAULT_WINDOW_SIZE	2			/* Default Window Size	*/
 #define	X25_DEFAULT_PACKET_SIZE	X25_PS128		/* Default Packet Size */
-#define	X25_DEFAULT_THROUGHPUT	0x0A			/* Deafult Throughput */
+#define	X25_DEFAULT_THROUGHPUT	0x0A			/* Default Throughput */
 #define	X25_DEFAULT_REVERSE	0x00			/* Default Reverse Charging */
 
 #define X25_SMODULUS 		8
-- 
cgit v1.2.3


From 70d0bb45fae87a3b08970a318e15f317446a1956 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 22 Aug 2024 13:57:33 +0100
Subject: net: Correct spelling in headers

Correct spelling in Networking headers.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240822-net-spell-v1-12-3a98971ce2d2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/etherdevice.h    |  2 +-
 include/linux/netdevice.h      |  8 ++++----
 include/net/addrconf.h         |  2 +-
 include/net/busy_poll.h        |  2 +-
 include/net/caif/caif_layer.h  |  4 ++--
 include/net/caif/cfpkt.h       |  2 +-
 include/net/dropreason-core.h  |  6 +++---
 include/net/dst.h              |  2 +-
 include/net/dst_cache.h        |  2 +-
 include/net/erspan.h           |  4 ++--
 include/net/hwbm.h             |  4 ++--
 include/net/llc_pdu.h          |  2 +-
 include/net/netlink.h          | 16 ++++++++--------
 include/net/netns/sctp.h       |  4 ++--
 include/net/regulatory.h       |  2 +-
 include/net/sock.h             |  4 ++--
 include/net/udp.h              |  2 +-
 include/uapi/linux/in.h        |  2 +-
 include/uapi/linux/inet_diag.h |  2 +-
 19 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 0ed47d00549b..30114c25ad12 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -645,7 +645,7 @@ static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb)
 }
 
 /**
- * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
+ * eth_skb_pad - Pad buffer to minimum number of octets for Ethernet frame
  * @skb: Buffer to pad
  *
  * An Ethernet frame should have a minimum size of 60 bytes.  This function
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7dfa836d9dbf..fce70990b209 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1237,7 +1237,7 @@ struct netdev_net_notifier {
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 vid)
- *	Deletes the FDB entry from dev coresponding to addr.
+ *	Deletes the FDB entry from dev corresponding to addr.
  * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev,
  *			   struct netlink_ext_ack *extack);
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
@@ -3514,7 +3514,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 	dql_completed(&dev_queue->dql, bytes);
 
 	/*
-	 * Without the memory barrier there is a small possiblity that
+	 * Without the memory barrier there is a small possibility that
 	 * netdev_tx_sent_queue will miss the update and cause the queue to
 	 * be stopped forever
 	 */
@@ -4583,7 +4583,7 @@ void dev_uc_flush(struct net_device *dev);
 void dev_uc_init(struct net_device *dev);
 
 /**
- *  __dev_uc_sync - Synchonize device's unicast list
+ *  __dev_uc_sync - Synchronize device's unicast list
  *  @dev:  device to sync
  *  @sync: function to call if address should be added
  *  @unsync: function to call if address should be removed
@@ -4627,7 +4627,7 @@ void dev_mc_flush(struct net_device *dev);
 void dev_mc_init(struct net_device *dev);
 
 /**
- *  __dev_mc_sync - Synchonize device's multicast list
+ *  __dev_mc_sync - Synchronize device's multicast list
  *  @dev:  device to sync
  *  @sync: function to call if address should be added
  *  @unsync: function to call if address should be removed
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c8ed31828db3..363dd63babe7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -333,7 +333,7 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
 /**
  * __in6_dev_stats_get - get inet6_dev pointer for stats
  * @dev: network device
- * @skb: skb for original incoming interface if neeeded
+ * @skb: skb for original incoming interface if needed
  *
  * Caller must hold rcu_read_lock or RTNL, because this function
  * does not take a reference on the inet6_dev.
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9b09acac538e..5e3b5703e4ab 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -131,7 +131,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
 #endif
 }
 
-/* used in the protocol hanlder to propagate the napi_id to the socket */
+/* used in the protocol handler to propagate the napi_id to the socket */
 static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 0f45d875905f..053e7c6a6a66 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -20,7 +20,7 @@ struct caif_payload_info;
  * @assert: expression to evaluate.
  *
  * This function will print a error message and a do WARN_ON if the
- * assertion failes. Normally this will do a stack up at the current location.
+ * assertion fails. Normally this will do a stack up at the current location.
  */
 #define caif_assert(assert)					\
 do {								\
@@ -116,7 +116,7 @@ enum caif_direction {
  * @dn:		Pointer down to the layer below.
  * @node:	List node used when layer participate in a list.
  * @receive:	Packet receive function.
- * @transmit:	Packet transmit funciton.
+ * @transmit:	Packet transmit function.
  * @ctrlcmd:	Used for control signalling upwards in the stack.
  * @modemcmd:	Used for control signaling downwards in the stack.
  * @id:		The identity of this layer
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index 44d914a50369..acf664227d96 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -18,7 +18,7 @@ struct cfpkt *cfpkt_create(u16 len);
 
 /*
  * Destroy a CAIF Packet.
- * pkt Packet to be destoyed.
+ * pkt Packet to be destroyed.
  */
 void cfpkt_destroy(struct cfpkt *pkt);
 
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 9707ab54fdd5..4748680e8c88 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -155,8 +155,8 @@ enum skb_drop_reason {
 	/** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
 	SKB_DROP_REASON_SOCKET_RCVBUFF,
 	/**
-	 * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
-	 * drop out of udp_memory_allocated.
+	 * @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as
+	 * udp packet drop out of udp_memory_allocated.
 	 */
 	SKB_DROP_REASON_PROTO_MEM,
 	/**
@@ -217,7 +217,7 @@ enum skb_drop_reason {
 	 */
 	SKB_DROP_REASON_TCP_ZEROWINDOW,
 	/**
-	 * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
+	 * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data received is already
 	 * received before (spurious retrans may happened), see
 	 * LINUX_MIB_DELAYEDACKLOST
 	 */
diff --git a/include/net/dst.h b/include/net/dst.h
index 0aa331bd2fdb..0f303cc60252 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -341,7 +341,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = dev;
 
 	/*
-	 * Clear hash so that we can recalulate the hash for the
+	 * Clear hash so that we can recalculate the hash for the
 	 * encapsulated packet, unless we have already determine the hash
 	 * over the L4 4-tuple.
 	 */
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index b4a55d2d5e71..1961699598e2 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -102,7 +102,7 @@ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
  *	@dst_cache: the cache
  *
  *	No synchronization is enforced: it must be called only when the cache
- *	is unsed.
+ *	is unused.
  */
 void dst_cache_destroy(struct dst_cache *dst_cache);
 
diff --git a/include/net/erspan.h b/include/net/erspan.h
index 6cb4cbd6a48f..c6209e7b6c96 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -89,7 +89,7 @@ enum erspan_encap_type {
 	ERSPAN_ENCAP_NOVLAN = 0x0,	/* originally without VLAN tag */
 	ERSPAN_ENCAP_ISL = 0x1,		/* originally ISL encapsulated */
 	ERSPAN_ENCAP_8021Q = 0x2,	/* originally 802.1Q encapsulated */
-	ERSPAN_ENCAP_INFRAME = 0x3,	/* VLAN tag perserved in frame */
+	ERSPAN_ENCAP_INFRAME = 0x3,	/* VLAN tag preserved in frame */
 };
 
 #define ERSPAN_V1_MDSIZE	4
@@ -192,7 +192,7 @@ static inline void erspan_build_header(struct sk_buff *skb,
 	enc_type = ERSPAN_ENCAP_NOVLAN;
 
 	/* If mirrored packet has vlan tag, extract tci and
-	 *  perserve vlan header in the mirrored frame.
+	 * preserve vlan header in the mirrored frame.
 	 */
 	if (eth->h_proto == htons(ETH_P_8021Q)) {
 		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
diff --git a/include/net/hwbm.h b/include/net/hwbm.h
index aa495decec35..bdbe91c609ff 100644
--- a/include/net/hwbm.h
+++ b/include/net/hwbm.h
@@ -11,9 +11,9 @@ struct hwbm_pool {
 	int frag_size;
 	/* Number of buffers currently used by this pool */
 	int buf_num;
-	/* constructor called during alocation */
+	/* constructor called during allocation */
 	int (*construct)(struct hwbm_pool *bm_pool, void *buf);
-	/* protect acces to the buffer counter*/
+	/* protect access to the buffer counter*/
 	struct mutex buf_lock;
 	/* private data */
 	void *priv;
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 1d55ba7c45be..86681f29bda7 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -254,7 +254,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
 }
 
 /**
- *	llc_pdu_decode_sa - extracs source address (MAC) of input frame
+ *	llc_pdu_decode_sa - extracts, source address (MAC) of input frame
  *	@skb: input skb that source address must be extracted from it.
  *	@sa: pointer to source address (6 byte array).
  *
diff --git a/include/net/netlink.h b/include/net/netlink.h
index e78ce008e07c..db6af207287c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -827,7 +827,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
 /**
  * nlmsg_find_attr - find a specific attribute in a netlink message
  * @nlh: netlink message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
  * @attrtype: type of attribute to look for
  *
  * Returns the first attribute which matches the specified type.
@@ -849,7 +849,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
  *
  * Validates all attributes in the specified attribute stream against the
  * specified policy. Validation is done in liberal mode.
- * See documenation of struct nla_policy for more details.
+ * See documentation of struct nla_policy for more details.
  *
  * Returns 0 on success or a negative error code.
  */
@@ -872,7 +872,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
  *
  * Validates all attributes in the specified attribute stream against the
  * specified policy. Validation is done in strict mode.
- * See documenation of struct nla_policy for more details.
+ * See documentation of struct nla_policy for more details.
  *
  * Returns 0 on success or a negative error code.
  */
@@ -887,7 +887,7 @@ static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
 /**
  * nlmsg_validate_deprecated - validate a netlink message including attributes
  * @nlh: netlinket message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
  * @maxtype: maximum attribute type to be expected
  * @policy: validation policy
  * @extack: extended ACK report struct
@@ -933,7 +933,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
  * nlmsg_for_each_attr - iterate over a stream of attributes
  * @pos: loop counter, set to current attribute
  * @nlh: netlink message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
  * @rem: initialized to len, holds bytes currently remaining in stream
  */
 #define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
@@ -1034,7 +1034,7 @@ static inline struct sk_buff *nlmsg_new_large(size_t payload)
  * @skb: socket buffer the message is stored in
  * @nlh: netlink message header
  *
- * Corrects the netlink message header to include the appeneded
+ * Corrects the netlink message header to include the appended
  * attributes. Only necessary if attributes have been added to
  * the message.
  */
@@ -1954,7 +1954,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
  * @start: container attribute
  *
  * Corrects the container attribute header to include the all
- * appeneded attributes.
+ * appended attributes.
  *
  * Returns the total data length of the skb.
  */
@@ -1987,7 +1987,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
  *
  * Validates all attributes in the nested attribute stream against the
  * specified policy. Attributes with a type exceeding maxtype will be
- * ignored. See documenation of struct nla_policy for more details.
+ * ignored. See documentation of struct nla_policy for more details.
  *
  * Returns 0 on success or a negative error code.
  */
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 7eff3d981b89..d25cd7a9c5ff 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -125,14 +125,14 @@ struct netns_sctp {
 	int pf_expose;
 
 	/*
-	 * Policy for preforming sctp/socket accounting
+	 * Policy for performing sctp/socket accounting
 	 * 0   - do socket level accounting, all assocs share sk_sndbuf
 	 * 1   - do sctp accounting, each asoc may use sk_sndbuf bytes
 	 */
 	int sndbuf_policy;
 
 	/*
-	 * Policy for preforming sctp/socket accounting
+	 * Policy for performing sctp/socket accounting
 	 * 0   - do socket level accounting, all assocs share sk_rcvbuf
 	 * 1   - do sctp accounting, each asoc may use sk_rcvbuf bytes
 	 */
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index a103f4c8cf75..6633627f6e76 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -121,7 +121,7 @@ struct regulatory_request {
  * @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to
  *	ensure that passive scan flags and beaconing flags may not be lifted by
  *	cfg80211 due to regulatory beacon hints. For more information on beacon
- *	hints read the documenation for regulatory_hint_found_beacon()
+ *	hints read the documentation for regulatory_hint_found_beacon()
  * @REGULATORY_COUNTRY_IE_FOLLOW_POWER:  for devices that have a preference
  *	that even though they may have programmed their own custom power
  *	setting prior to wiphy registration, they want to ensure their channel
diff --git a/include/net/sock.h b/include/net/sock.h
index cce23ac4d514..f51d61fab059 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1624,7 +1624,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
  * lock_sock_fast - fast version of lock_sock
  * @sk: socket
  *
- * This version should be used for very small section, where process wont block
+ * This version should be used for very small section, where process won't block
  * return false if fast path is taken:
  *
  *   sk_lock.slock locked, owned = 0, BH disabled
@@ -2546,7 +2546,7 @@ struct sock_skb_cb {
 
 /* Store sock_skb_cb at the end of skb->cb[] so protocol families
  * using skb->cb[] would keep using it directly and utilize its
- * alignement guarantee.
+ * alignment guarantee.
  */
 #define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
 			    sizeof(struct sock_skb_cb)))
diff --git a/include/net/udp.h b/include/net/udp.h
index 5ca53b1cec67..61222545ab1c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -232,7 +232,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 	}
 
 	/* Since this is being sent on the wire obfuscate hash a bit
-	 * to minimize possbility that any useful information to an
+	 * to minimize possibility that any useful information to an
 	 * attacker is leaked. Only upper 16 bits are relevant in the
 	 * computation for 16 bit port value.
 	 */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index d358add1611c..5d32d53508d9 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -141,7 +141,7 @@ struct in_addr {
  */
 #define IP_PMTUDISC_INTERFACE		4
 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
- * fragmented if they exeed the interface mtu
+ * fragmented if they exceed the interface mtu
  */
 #define IP_PMTUDISC_OMIT		5
 
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 50655de04c9b..86bb2e8b17c9 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -143,7 +143,7 @@ enum {
 	INET_DIAG_SHUTDOWN,
 
 	/*
-	 * Next extenstions cannot be requested in struct inet_diag_req_v2:
+	 * Next extensions cannot be requested in struct inet_diag_req_v2:
 	 * its field idiag_ext has only 8 bits.
 	 */
 
-- 
cgit v1.2.3


From cead0b8991713bdeb5b947758dd62287fcf8da40 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Mon, 26 Aug 2024 16:09:43 +0530
Subject: nvme: rename apptag and appmask to lbat and lbatm

Rename apptag and appmask to lbat and lbatm so that it matches the field
names used in NVMe spec.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7b2ae2e43544..b58d9405d65e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -987,8 +987,8 @@ struct nvme_rw_command {
 	__le16			control;
 	__le32			dsmgmt;
 	__le32			reftag;
-	__le16			apptag;
-	__le16			appmask;
+	__le16			lbat;
+	__le16			lbatm;
 };
 
 enum {
@@ -1057,8 +1057,8 @@ struct nvme_write_zeroes_cmd {
 	__le16			control;
 	__le32			dsmgmt;
 	__le32			reftag;
-	__le16			apptag;
-	__le16			appmask;
+	__le16			lbat;
+	__le16			lbatm;
 };
 
 enum nvme_zone_mgmt_action {
-- 
cgit v1.2.3


From 1b9c2e94df1408e567f23641f6052af9a75614d1 Mon Sep 17 00:00:00 2001
From: Muhammad Qasim Abdul Majeed <qasim.majeed20@gmail.com>
Date: Sun, 4 Aug 2024 17:33:13 +0500
Subject: ACPI: bus: Define and use symbols for device and class name lengths

It is better to define symbols for the maximum ACPI device name length
and the maximum ACPI class name length instead of using raw numbers in
typedef statements.

Signed-off-by: Muhammad Qasim Abdul Majeed <qasim.majeed20@gmail.com>
Link: https://patch.msgid.link/20240804123313.16211-6-qasim.majeed20@gmail.com
[ rjw: Subject edits, added a changelog, dropped unrelated change ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8db5bd382915..049cbda28c4c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -228,10 +228,12 @@ struct acpi_device_dir {
 
 /* Plug and Play */
 
+#define MAX_ACPI_DEVICE_NAME_LEN	40
+#define MAX_ACPI_CLASS_NAME_LEN		20
 typedef char acpi_bus_id[8];
 typedef u64 acpi_bus_address;
-typedef char acpi_device_name[40];
-typedef char acpi_device_class[20];
+typedef char acpi_device_name[MAX_ACPI_DEVICE_NAME_LEN];
+typedef char acpi_device_class[MAX_ACPI_CLASS_NAME_LEN];
 
 struct acpi_hardware_id {
 	struct list_head list;
-- 
cgit v1.2.3


From 50c6dbdfd16e312382842198a7919341ad480e05 Mon Sep 17 00:00:00 2001
From: Max Ramanouski <max8rr8@gmail.com>
Date: Sun, 25 Aug 2024 01:01:11 +0300
Subject: x86/ioremap: Improve iounmap() address range checks

Allowing iounmap() on memory that was not ioremap()'d in the first
place is obviously a bad idea.  There is currently a feeble attempt to
avoid errant iounmap()s by checking to see if the address is below
"high_memory".  But that's imprecise at best because there are plenty
of high addresses that are also invalid to call iounmap() on.

Thankfully, there is a more precise helper: is_ioremap_addr().  x86
just does not use it in iounmap().

Restrict iounmap() to addresses in the ioremap region, by using
is_ioremap_addr(). This aligns x86 closer to the generic iounmap()
implementation.

Additionally, add a warning in case there is an attempt to iounmap()
invalid memory.  This replaces an existing silent return and will
help alert folks to any incorrect usage of iounmap().

Due to VMALLOC_START on i386 not being present in asm/pgtable.h,
include for asm/vmalloc.h had to be added to include/linux/ioremap.h.

[ dhansen: tweak subject and changelog ]

Signed-off-by: Max Ramanouski <max8rr8@gmail.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://lore.kernel.org/all/20240824220111.84441-1-max8rr8%40gmail.com
---
 include/linux/ioremap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h
index f0e99fc7dd8b..2bd1661fe9ad 100644
--- a/include/linux/ioremap.h
+++ b/include/linux/ioremap.h
@@ -4,6 +4,7 @@
 
 #include <linux/kasan.h>
 #include <asm/pgtable.h>
+#include <asm/vmalloc.h>
 
 #if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP)
 /*
-- 
cgit v1.2.3


From 22652022c7eef3c4ad6ab5f13a6dfc7f25f853d4 Mon Sep 17 00:00:00 2001
From: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Date: Mon, 26 Aug 2024 14:24:42 -0400
Subject: ASoC: SOF: ipc: replace "enum sof_comp_type" field with "uint32_t"

Normally, the type of enums is "unsigned int" or "int". GCC has
the "-fshort-enums" option, which instructs the compiler to
use the smallest data type that can hold all the values in
the enum (i.e: char, short, int or their unsigned variants).

According to the GCC documentation, "-fshort-enums" may be
default on some targets. This seems to be the case for SOF
when built for a certain 32-bit ARM platform.

On Linux, this is not the case (tested with "aarch64-linux-gnu-gcc")
which means enums such as "enum sof_comp_type" will end up having
different sizes on Linux and SOF. Since "enum sof_comp_type" is used in
IPC-related structures such as "struct sof_ipc_comp", this means
the fields of the structures will end up being placed at different
offsets. This, in turn, leads to SOF not being able to properly
interpret data passed from Linux.

With this in mind, replace "enum sof_comp_type" from
"struct sof_ipc_comp" with "uint32_t".

Signed-off-by: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20240826182442.6191-1-laurentiumihalcea111@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/topology.h | 2 +-
 include/uapi/sound/sof/abi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/sof/topology.h b/include/sound/sof/topology.h
index 3ba086f61983..449e93c25184 100644
--- a/include/sound/sof/topology.h
+++ b/include/sound/sof/topology.h
@@ -54,7 +54,7 @@ enum sof_comp_type {
 struct sof_ipc_comp {
 	struct sof_ipc_cmd_hdr hdr;
 	uint32_t id;
-	enum sof_comp_type type;
+	uint32_t type;
 	uint32_t pipeline_id;
 	uint32_t core;
 
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index 937ed9408c23..c1b158ec5dab 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -29,7 +29,7 @@
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
 #define SOF_ABI_MINOR 23
-#define SOF_ABI_PATCH 0
+#define SOF_ABI_PATCH 1
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
 #define SOF_ABI_MAJOR_SHIFT	24
-- 
cgit v1.2.3


From 1bf8012fc6997f2117f6919369cde16659db60e0 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang@linux.dev>
Date: Mon, 19 Aug 2024 22:59:45 +0800
Subject: pstore: replace spinlock_t by raw_spinlock_t

pstore_dump() is called when both preemption and local IRQ are disabled,
and a spinlock is obtained, which is problematic for the RT kernel because
in this configuration, spinlocks are sleep locks.

Replace the spinlock_t with raw_spinlock_t to avoid sleeping in atomic context.

Signed-off-by: Wen Yang <wen.yang@linux.dev>
Cc: Kees Cook <kees@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Guilherme G. Piccoli <gpiccoli@igalia.com>
Cc: linux-hardening@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/20240819145945.61274-1-wen.yang@linux.dev
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/pstore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 638507a3c8ff..fed601053c51 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -182,7 +182,7 @@ struct pstore_info {
 	struct module	*owner;
 	const char	*name;
 
-	spinlock_t	buf_lock;
+	raw_spinlock_t	buf_lock;
 	char		*buf;
 	size_t		bufsize;
 
-- 
cgit v1.2.3


From 7e8ae8486e4471513e2111aba6ac29f2357bed2a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 26 Aug 2024 10:32:34 -0400
Subject: fs/nfsd: fix update of inode attrs in CB_GETATTR

Currently, we copy the mtime and ctime to the in-core inode and then
mark the inode dirty. This is fine for certain types of filesystems, but
not all. Some require a real setattr to properly change these values
(e.g. ceph or reexported NFS).

Fix this code to call notify_change() instead, which is the proper way
to effect a setattr. There is one problem though:

In this case, the client is holding a write delegation and has sent us
attributes to update our cache. We don't want to break the delegation
for this since that would defeat the purpose. Add a new ATTR_DELEG flag
that makes notify_change bypass the try_break_deleg call.

Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation")
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..bafc1d134b94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -208,6 +208,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
 #define ATTR_TIMES_SET	(1 << 16)
 #define ATTR_TOUCH	(1 << 17)
+#define ATTR_DELEG	(1 << 18) /* Delegated attrs. Don't break write delegations */
 
 /*
  * Whiteout is represented by a char device.  The following constants define the
-- 
cgit v1.2.3


From 13acf2b74803a9a9ab3475c306d5814cf3cefbd8 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 14:32:42 +0800
Subject: ata: libata: Remove obsolete function declarations

The function ata_schedule_scsi_eh() was removed with commit
f8bbfc247efb ("[PATCH] SCSI: make scsi_implement_eh() generic API for
SCSI transports"), and the function ata_sff_irq_clear() was removed
with commit 37f65b8bc262("libata-sff: ata_sff_irq_clear() is BMDMA
specific").

Remove the now useless declarations of these functions in
drivers/ata/libata.h and include/linux/libata.h.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 include/linux/libata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0279c0a6302f..6552e90753ae 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -2006,7 +2006,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc,
 extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern void ata_sff_irq_on(struct ata_port *ap);
-extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 			    u8 status, int in_wq);
 extern void ata_sff_queue_work(struct work_struct *work);
-- 
cgit v1.2.3


From cda1fba15cb2282b3c364805c9767698f11c3b0e Mon Sep 17 00:00:00 2001
From: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Date: Fri, 23 Aug 2024 00:25:12 +0200
Subject: dpll: add Embedded SYNC feature for a pin

Implement and document new pin attributes for providing Embedded SYNC
capabilities to the DPLL subsystem users through a netlink pin-get
do/dump messages. Allow the user to set Embedded SYNC frequency with
pin-set do netlink message.

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20240822222513.255179-2-arkadiusz.kubalewski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dpll.h      | 15 +++++++++++++++
 include/uapi/linux/dpll.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index d275736230b3..81f7b623d0ba 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -15,6 +15,7 @@
 
 struct dpll_device;
 struct dpll_pin;
+struct dpll_pin_esync;
 
 struct dpll_device_ops {
 	int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv,
@@ -83,6 +84,13 @@ struct dpll_pin_ops {
 	int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv,
 		       const struct dpll_device *dpll, void *dpll_priv,
 		       s64 *ffo, struct netlink_ext_ack *extack);
+	int (*esync_set)(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 u64 freq, struct netlink_ext_ack *extack);
+	int (*esync_get)(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 struct dpll_pin_esync *esync,
+			 struct netlink_ext_ack *extack);
 };
 
 struct dpll_pin_frequency {
@@ -111,6 +119,13 @@ struct dpll_pin_phase_adjust_range {
 	s32 max;
 };
 
+struct dpll_pin_esync {
+	u64 freq;
+	const struct dpll_pin_frequency *range;
+	u8 range_num;
+	u8 pulse;
+};
+
 struct dpll_pin_properties {
 	const char *board_label;
 	const char *panel_label;
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index 0c13d7f1a1bc..b0654ade7b7e 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -210,6 +210,9 @@ enum dpll_a_pin {
 	DPLL_A_PIN_PHASE_ADJUST,
 	DPLL_A_PIN_PHASE_OFFSET,
 	DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET,
+	DPLL_A_PIN_ESYNC_FREQUENCY,
+	DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED,
+	DPLL_A_PIN_ESYNC_PULSE,
 
 	__DPLL_A_PIN_MAX,
 	DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
-- 
cgit v1.2.3


From 7e8fb2eda9885ea2d13179a4c0bbf810f900ef25 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 22 Jul 2024 22:16:47 -0700
Subject: jbd2: fix kernel-doc for j_transaction_overhead_buffers

Use the correct struct member name in the kernel-doc notation
to prevent a kernel-doc build warning.

include/linux/jbd2.h:1303: warning: Function parameter or struct member 'j_transaction_overhead_buffers' not described in 'journal_s'
include/linux/jbd2.h:1303: warning: Excess struct member 'j_transaction_overhead' description in 'journal_s'

Fixes: e3a00a23781c ("jbd2: precompute number of transaction descriptor blocks")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/linux-next/20240710182252.4c281445@canb.auug.org.au/
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20240723051647.3053491-1-rdunlap@infradead.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5157d92b6f23..17662eae408f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1086,7 +1086,7 @@ struct journal_s
 	int			j_revoke_records_per_block;
 
 	/**
-	 * @j_transaction_overhead:
+	 * @j_transaction_overhead_buffers:
 	 *
 	 * Number of blocks each transaction needs for its own bookkeeping
 	 */
-- 
cgit v1.2.3


From fa10db138d205362026daecc8ea65d4e339ade00 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Thu, 1 Aug 2024 09:38:10 +0800
Subject: jbd2: remove unused return value of jbd2_fc_release_bufs

Remove unused return value of jbd2_fc_release_bufs.

Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20240801013815.2393869-4-shikemeng@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 17662eae408f..8aef9bb6ad57 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
 int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
-int jbd2_fc_release_bufs(journal_t *journal);
+void jbd2_fc_release_bufs(journal_t *journal);
 
 /*
  * is_journal_abort
-- 
cgit v1.2.3


From 5e1c5c5a687bb3351d9b4a3b0ad457f8497b2a0d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2024 17:27:23 +0200
Subject: ALSA: pcm: Drop PCM vmalloc buffer helpers

As the last-standing user of PCM vmalloc buffer helper API took its
own buffer management, we can finally drop those API functions, which
were leftover after reorganization of ALSA memalloc code.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240807152725.18948-3-tiwai@suse.de
---
 include/sound/pcm.h | 42 ------------------------------------------
 1 file changed, 42 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 384032b6c59c..732121b934fd 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1358,48 +1358,6 @@ snd_pcm_set_fixed_buffer_all(struct snd_pcm *pcm, int type,
 	return snd_pcm_set_managed_buffer_all(pcm, type, data, size, 0);
 }
 
-int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream,
-				      size_t size, gfp_t gfp_flags);
-int snd_pcm_lib_free_vmalloc_buffer(struct snd_pcm_substream *substream);
-struct page *snd_pcm_lib_get_vmalloc_page(struct snd_pcm_substream *substream,
-					  unsigned long offset);
-/**
- * snd_pcm_lib_alloc_vmalloc_buffer - allocate virtual DMA buffer
- * @substream: the substream to allocate the buffer to
- * @size: the requested buffer size, in bytes
- *
- * Allocates the PCM substream buffer using vmalloc(), i.e., the memory is
- * contiguous in kernel virtual space, but not in physical memory.  Use this
- * if the buffer is accessed by kernel code but not by device DMA.
- *
- * Return: 1 if the buffer was changed, 0 if not changed, or a negative error
- * code.
- */
-static inline int snd_pcm_lib_alloc_vmalloc_buffer
-			(struct snd_pcm_substream *substream, size_t size)
-{
-	return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size,
-						 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
-}
-
-/**
- * snd_pcm_lib_alloc_vmalloc_32_buffer - allocate 32-bit-addressable buffer
- * @substream: the substream to allocate the buffer to
- * @size: the requested buffer size, in bytes
- *
- * This function works like snd_pcm_lib_alloc_vmalloc_buffer(), but uses
- * vmalloc_32(), i.e., the pages are allocated from 32-bit-addressable memory.
- *
- * Return: 1 if the buffer was changed, 0 if not changed, or a negative error
- * code.
- */
-static inline int snd_pcm_lib_alloc_vmalloc_32_buffer
-			(struct snd_pcm_substream *substream, size_t size)
-{
-	return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size,
-						 GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
-}
-
 #define snd_pcm_get_dma_buf(substream) ((substream)->runtime->dma_buffer_p)
 
 /**
-- 
cgit v1.2.3


From 9a948c0c8e741776ee6d938b49d34d22034fbb7d Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 14 Aug 2024 11:34:15 +0800
Subject: ceph: Remove unused declarations

These functions is never implemented and used.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f66f6aac74f6..d7941478158c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc,
 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
 extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
 
-extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
-				   struct ceph_msg *msg);
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
-- 
cgit v1.2.3


From ea63fb71993c56628f323b8268d36f4bbd836a7f Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Thu, 25 Jul 2024 12:09:25 +0300
Subject: wifi: mac80211: refactor block ack management code

Introduce 'ieee80211_mgmt_ba()' to avoid code duplication between
'ieee80211_send_addba_resp()', 'ieee80211_send_addba_request()',
and 'ieee80211_send_delba()', ensure that all related addresses
are '__aligned(2)', and prefer convenient 'ether_addr_copy()'
over generic 'memcpy()'. No functional changes expected.

Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Link: https://patch.msgid.link/20240725090925.6022-1-dmantipov@yandex.ru
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0a04eaf5343c..9406f687cffb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2487,7 +2487,7 @@ struct ieee80211_link_sta {
  * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
  */
 struct ieee80211_sta {
-	u8 addr[ETH_ALEN];
+	u8 addr[ETH_ALEN] __aligned(2);
 	u16 aid;
 	u16 max_rx_aggregation_subframes;
 	bool wme;
-- 
cgit v1.2.3


From e7a7ef9a0742dbd0818d5b15fba2c5313ace765b Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jul 2024 15:48:16 +0800
Subject: wifi: mac80211: don't use rate mask for offchannel TX either

Like the commit ab9177d83c04 ("wifi: mac80211: don't use rate mask for
scanning"), ignore incorrect settings to avoid no supported rate warning
reported by syzbot.

The syzbot did bisect and found cause is commit 9df66d5b9f45 ("cfg80211:
fix default HE tx bitrate mask in 2G band"), which however corrects
bitmask of HE MCS and recognizes correctly settings of empty legacy rate
plus HE MCS rate instead of returning -EINVAL.

As suggestions [1], follow the change of SCAN TX to consider this case of
offchannel TX as well.

[1] https://lore.kernel.org/linux-wireless/6ab2dc9c3afe753ca6fdcdd1421e7a1f47e87b84.camel@sipsolutions.net/T/#m2ac2a6d2be06a37c9c47a3d8a44b4f647ed4f024

Reported-by: syzbot+8dd98a9e98ee28dc484a@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-wireless/000000000000fdef8706191a3f7b@google.com/
Fixes: 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band")
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://patch.msgid.link/20240729074816.20323-1-pkshih@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9406f687cffb..de50dc8712c0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -994,8 +994,9 @@ enum mac80211_tx_info_flags {
  *	of their QoS TID or other priority field values.
  * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally
  *	for sequence number assignment
- * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted
- *	due to scanning, not in normal operation on the interface.
+ * @IEEE80211_TX_CTRL_DONT_USE_RATE_MASK: Don't use rate mask for this frame
+ *	which is transmitted due to scanning or offchannel TX, not in normal
+ *	operation on the interface.
  * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this
  *	frame should be transmitted on the specific link. This really is
  *	only relevant for frames that do not have data present, and is
@@ -1016,7 +1017,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTRL_NO_SEQNO		= BIT(7),
 	IEEE80211_TX_CTRL_DONT_REORDER		= BIT(8),
 	IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX	= BIT(9),
-	IEEE80211_TX_CTRL_SCAN_TX		= BIT(10),
+	IEEE80211_TX_CTRL_DONT_USE_RATE_MASK	= BIT(10),
 	IEEE80211_TX_CTRL_MLO_LINK		= 0xf0000000,
 };
 
-- 
cgit v1.2.3


From 3a3d1afd25ea2e142c78b67fb769df5be7d308c3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 5 Aug 2024 08:40:37 +0200
Subject: wifi: lib80211: Handle const struct lib80211_crypto_ops in lib80211

lib80211_register_crypto_ops() and lib80211_unregister_crypto_ops() don't
modify their "struct lib80211_crypto_ops *ops" argument. So, it can be
declared as const.

Doing so, some adjustments are needed to also constify some date in
"struct lib80211_crypt_data", "struct lib80211_crypto_alg" and the
return value of lib80211_get_crypto_ops().

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/c74085e02f33a11327582b19c9f51c3236e85ae2.1722839425.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/lib80211.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index 8b47d3a51cf8..fd0f15d87d80 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -92,7 +92,7 @@ struct lib80211_crypto_ops {
 
 struct lib80211_crypt_data {
 	struct list_head list;	/* delayed deletion list */
-	struct lib80211_crypto_ops *ops;
+	const struct lib80211_crypto_ops *ops;
 	void *priv;
 	atomic_t refcnt;
 };
@@ -113,9 +113,9 @@ struct lib80211_crypt_info {
 int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
                                 spinlock_t *lock);
 void lib80211_crypt_info_free(struct lib80211_crypt_info *info);
-int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops);
-int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops);
-struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
+int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops);
+int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops);
+const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
 void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
 				    struct lib80211_crypt_data **crypt);
 
-- 
cgit v1.2.3


From 7c3b69eadea9e57c28bf914b0fd70f268f3682e1 Mon Sep 17 00:00:00 2001
From: Rory Little <rory@candelatech.com>
Date: Mon, 5 Aug 2024 17:40:23 -0700
Subject: wifi: mac80211: Add non-atomic station iterator

Drivers may at times want to iterate their stations with a function
which requires some non-atomic operations.

ieee80211_iterate_stations_mtx() introduces an API to iterate stations
while holding that wiphy's mutex. This allows the iterating function to
do non-atomic operations safely.

Signed-off-by: Rory Little <rory@candelatech.com>
Link: https://patch.msgid.link/20240806004024.2014080-2-rory@candelatech.com
[unify internal list iteration functions]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index de50dc8712c0..cfd64acdc039 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6243,6 +6243,24 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
 				       void (*iterator)(void *data,
 						struct ieee80211_sta *sta),
 				       void *data);
+
+/**
+ * ieee80211_iterate_stations_mtx - iterate stations
+ *
+ * This function iterates over all stations associated with a given
+ * hardware that are currently uploaded to the driver and calls the callback
+ * function for them. This version can only be used while holding the wiphy
+ * mutex.
+ *
+ * @hw: the hardware struct of which the interfaces should be iterated over
+ * @iterator: the iterator function to call
+ * @data: first argument of the iterator function
+ */
+void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
+				    void (*iterator)(void *data,
+						     struct ieee80211_sta *sta),
+				    void *data);
+
 /**
  * ieee80211_queue_work - add work onto the mac80211 workqueue
  *
-- 
cgit v1.2.3


From 373d3f8dcbb1c0d764123653ca4574be636b8d86 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Sun, 11 Aug 2024 12:47:26 +0800
Subject: wifi: rfkill: Correct parameter type for rfkill_set_hw_state_reason()

Change type of parameter @reason to enum rfkill_hard_block_reasons
for API rfkill_set_hw_state_reason() according to its comments, and
all kernel callers have invoked the API with enum type actually.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://patch.msgid.link/20240811-rfkill_fix-v2-1-9050760336f4@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 373003ace639..997b34197385 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -147,7 +147,8 @@ void rfkill_destroy(struct rfkill *rfkill);
  * Prefer to use rfkill_set_hw_state if you don't need any special reason.
  */
 bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
-				bool blocked, unsigned long reason);
+				bool blocked,
+				enum rfkill_hard_block_reasons reason);
 /**
  * rfkill_set_hw_state - Set the internal rfkill hardware block state
  * @rfkill: pointer to the rfkill class to modify.
@@ -280,7 +281,7 @@ static inline void rfkill_destroy(struct rfkill *rfkill)
 
 static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
 					      bool blocked,
-					      unsigned long reason)
+					      enum rfkill_hard_block_reasons reason)
 {
 	return blocked;
 }
-- 
cgit v1.2.3


From 53bc1b73b67836ac9867f93dee7a443986b4a94f Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Thu, 22 Aug 2024 09:42:54 +0800
Subject: wifi: mac80211: export ieee80211_purge_tx_queue() for drivers

Drivers need to purge TX SKB when stopping. Using skb_queue_purge() can't
report TX status to mac80211, causing ieee80211_free_ack_frame() warns
"Have pending ack frames!". Export ieee80211_purge_tx_queue() for drivers
to not have to reimplement it.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://patch.msgid.link/20240822014255.10211-1-pkshih@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cfd64acdc039..adfec877f392 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3181,6 +3181,19 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  */
 void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
 
+/**
+ * ieee80211_purge_tx_queue - purge TX skb queue
+ * @hw: the hardware
+ * @skbs: the skbs
+ *
+ * Free a set of transmit skbs. Use this function when device is going to stop
+ * but some transmit skbs without TX status are still queued.
+ * This function does not take the list lock and the caller must hold the
+ * relevant locks to use it.
+ */
+void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
+			      struct sk_buff_head *skbs);
+
 /**
  * DOC: Hardware crypto acceleration
  *
-- 
cgit v1.2.3


From 21b91d40575fb64f3f280f6c3af586e32a704a92 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 8 Aug 2024 22:05:54 +0800
Subject: efi: Remove unused declaration efi_initialize_iomem_resources()

Since commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture"),
this is not used anymore.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6bf3c4fe8511..e28d88066033 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -764,8 +764,6 @@ extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern void efi_mem_reserve(phys_addr_t addr, u64 size);
 extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size);
-extern void efi_initialize_iomem_resources(struct resource *code_resource,
-		struct resource *data_resource, struct resource *bss_resource);
 extern u64 efi_get_fdt_params(struct efi_memory_map_data *data);
 extern struct kobject *efi_kobj;
 
-- 
cgit v1.2.3


From 2955ae8186c8a6f029e429f7890e0c7e5f6e215e Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Thu, 1 Aug 2024 20:10:51 -0700
Subject: drm/i915: ARL requires a newer GSC firmware

ARL and MTL share a single GSC firmware blob. However, ARL requires a
newer version of it.

So add differentiate of the PCI ids for ARL from MTL and create ARL as
a sub-platform of MTL. That way, all the existing workarounds and such
still treat ARL as MTL exactly as before. However, now the GSC code
can check for ARL and do an extra version check on the firmware before
committing to it.

Also, the version extraction code has various ways of failing but the
return code was being ignore and so the firmware load would attempt to
continue anyway. Fix that by propagating the return code to the next
level out.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake")
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 67733d7a71503fd3e32eeada371f8aa2516c5c95)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/drm/intel/i915_pciids.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index b21374f76df2..2bf03ebfcf73 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -772,15 +772,18 @@
 	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
 /* MTL */
+#define INTEL_ARL_IDS(MACRO__, ...) \
+	MACRO__(0x7D41, ## __VA_ARGS__), \
+	MACRO__(0x7D51, ## __VA_ARGS__), \
+	MACRO__(0x7D67, ## __VA_ARGS__), \
+	MACRO__(0x7DD1, ## __VA_ARGS__)
+
 #define INTEL_MTL_IDS(MACRO__, ...) \
+	INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \
 	MACRO__(0x7D40, ## __VA_ARGS__), \
-	MACRO__(0x7D41, ## __VA_ARGS__), \
 	MACRO__(0x7D45, ## __VA_ARGS__), \
-	MACRO__(0x7D51, ## __VA_ARGS__), \
 	MACRO__(0x7D55, ## __VA_ARGS__), \
 	MACRO__(0x7D60, ## __VA_ARGS__), \
-	MACRO__(0x7D67, ## __VA_ARGS__), \
-	MACRO__(0x7DD1, ## __VA_ARGS__), \
 	MACRO__(0x7DD5, ## __VA_ARGS__)
 
 /* LNL */
-- 
cgit v1.2.3


From 2b55d6a42d14c8675e38d6d9adca3014fdf01951 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 20 Aug 2024 17:59:35 +0200
Subject: rcu/kvfree: Add kvfree_rcu_barrier() API

Add a kvfree_rcu_barrier() function. It waits until all
in-flight pointers are freed over RCU machinery. It does
not wait any GP completion and it is within its right to
return immediately if there are no outstanding pointers.

This function is useful when there is a need to guarantee
that a memory is fully freed before destroying memory caches.
For example, during unloading a kernel module.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/rcutiny.h | 5 +++++
 include/linux/rcutree.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d9ac7b136aea..522123050ff8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
 	kvfree(ptr);
 }
 
+static inline void kvfree_rcu_barrier(void)
+{
+	rcu_barrier();
+}
+
 #ifdef CONFIG_KASAN_GENERIC
 void kvfree_call_rcu(struct rcu_head *head, void *ptr);
 #else
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 254244202ea9..58e7db80f3a8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
 
 void synchronize_rcu_expedited(void);
 void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+void kvfree_rcu_barrier(void);
 
 void rcu_barrier(void);
 void rcu_momentary_dyntick_idle(void);
-- 
cgit v1.2.3


From b3c34245756adada8a50bdaedbb3965b071c7b0a Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 9 Aug 2024 17:36:55 +0200
Subject: kasan: catch invalid free before SLUB reinitializes the object

Currently, when KASAN is combined with init-on-free behavior, the
initialization happens before KASAN's "invalid free" checks.

More importantly, a subsequent commit will want to RCU-delay the actual
SLUB freeing of an object, and we'd like KASAN to still validate
synchronously that freeing the object is permitted. (Otherwise this
change will make the existing testcase kmem_cache_invalid_free fail.)

So add a new KASAN hook that allows KASAN to pre-validate a
kmem_cache_free() operation before SLUB actually starts modifying the
object or its metadata.

Inside KASAN, this:

 - moves checks from poison_slab_object() into check_slab_allocation()
 - moves kasan_arch_is_ready() up into callers of poison_slab_object()
 - removes "ip" argument of poison_slab_object() and __kasan_slab_free()
   (since those functions no longer do any reporting)

Acked-by: Vlastimil Babka <vbabka@suse.cz> #slub
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/kasan.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 70d6a8f6e25d..1570c7191176 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -175,13 +175,55 @@ static __always_inline void * __must_check kasan_init_slab_obj(
 	return (void *)object;
 }
 
-bool __kasan_slab_free(struct kmem_cache *s, void *object,
-			unsigned long ip, bool init);
+bool __kasan_slab_pre_free(struct kmem_cache *s, void *object,
+			unsigned long ip);
+/**
+ * kasan_slab_pre_free - Check whether freeing a slab object is safe.
+ * @object: Object to be freed.
+ *
+ * This function checks whether freeing the given object is safe. It may
+ * check for double-free and invalid-free bugs and report them.
+ *
+ * This function is intended only for use by the slab allocator.
+ *
+ * @Return true if freeing the object is unsafe; false otherwise.
+ */
+static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
+						void *object)
+{
+	if (kasan_enabled())
+		return __kasan_slab_pre_free(s, object, _RET_IP_);
+	return false;
+}
+
+bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init);
+/**
+ * kasan_slab_free - Poison, initialize, and quarantine a slab object.
+ * @object: Object to be freed.
+ * @init: Whether to initialize the object.
+ *
+ * This function informs that a slab object has been freed and is not
+ * supposed to be accessed anymore, except for objects in
+ * SLAB_TYPESAFE_BY_RCU caches.
+ *
+ * For KASAN modes that have integrated memory initialization
+ * (kasan_has_integrated_init() == true), this function also initializes
+ * the object's memory. For other modes, the @init argument is ignored.
+ *
+ * This function might also take ownership of the object to quarantine it.
+ * When this happens, KASAN will defer freeing the object to a later
+ * stage and handle it internally until then. The return value indicates
+ * whether KASAN took ownership of the object.
+ *
+ * This function is intended only for use by the slab allocator.
+ *
+ * @Return true if KASAN took ownership of the object; false otherwise.
+ */
 static __always_inline bool kasan_slab_free(struct kmem_cache *s,
 						void *object, bool init)
 {
 	if (kasan_enabled())
-		return __kasan_slab_free(s, object, _RET_IP_, init);
+		return __kasan_slab_free(s, object, init);
 	return false;
 }
 
@@ -371,6 +413,12 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
 {
 	return (void *)object;
 }
+
+static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
+{
+	return false;
+}
+
 static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init)
 {
 	return false;
-- 
cgit v1.2.3


From b8c8ba73c68bb3c3e9dad22f488b86c540c839f9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 9 Aug 2024 17:36:56 +0200
Subject: slub: Introduce CONFIG_SLUB_RCU_DEBUG

Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU
slabs because use-after-free is allowed within the RCU grace period by
design.

Add a SLUB debugging feature which RCU-delays every individual
kmem_cache_free() before either actually freeing the object or handing it
off to KASAN, and change KASAN to poison freed objects as normal when this
option is enabled.

For now I've configured Kconfig.debug to default-enable this feature in the
KASAN GENERIC and SW_TAGS modes; I'm not enabling it by default in HW_TAGS
mode because I'm not sure if it might have unwanted performance degradation
effects there.

Note that this is mostly useful with KASAN in the quarantine-based GENERIC
mode; SLAB_TYPESAFE_BY_RCU slabs are basically always also slabs with a
->ctor, and KASAN's assign_tag() currently has to assign fixed tags for
those, reducing the effectiveness of SW_TAGS/HW_TAGS mode.
(A possible future extension of this work would be to also let SLUB call
the ->ctor() on every allocation instead of only when the slab page is
allocated; then tag-based modes would be able to assign new tags on every
reallocation.)

Tested-by: syzbot+263726e59eab6b442723@syzkaller.appspotmail.com
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Acked-by: Marco Elver <elver@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz> #slab
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/kasan.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 1570c7191176..00a3bf7c0d8f 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -196,15 +196,18 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
 	return false;
 }
 
-bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init);
+bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
+		       bool still_accessible);
 /**
  * kasan_slab_free - Poison, initialize, and quarantine a slab object.
  * @object: Object to be freed.
  * @init: Whether to initialize the object.
+ * @still_accessible: Whether the object contents are still accessible.
  *
  * This function informs that a slab object has been freed and is not
- * supposed to be accessed anymore, except for objects in
- * SLAB_TYPESAFE_BY_RCU caches.
+ * supposed to be accessed anymore, except when @still_accessible is set
+ * (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU
+ * grace period might not have passed yet).
  *
  * For KASAN modes that have integrated memory initialization
  * (kasan_has_integrated_init() == true), this function also initializes
@@ -220,10 +223,11 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init);
  * @Return true if KASAN took ownership of the object; false otherwise.
  */
 static __always_inline bool kasan_slab_free(struct kmem_cache *s,
-						void *object, bool init)
+						void *object, bool init,
+						bool still_accessible)
 {
 	if (kasan_enabled())
-		return __kasan_slab_free(s, object, init);
+		return __kasan_slab_free(s, object, init, still_accessible);
 	return false;
 }
 
@@ -419,7 +423,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
 	return false;
 }
 
-static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init)
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
+				   bool init, bool still_accessible)
 {
 	return false;
 }
-- 
cgit v1.2.3


From d8ea645d6984c84a87032063a0941f15a323831f Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Sun, 18 Aug 2024 21:47:26 -0700
Subject: RDMA/bnxt_re: Handle variable WQE support for user applications

User library calculates the number of slots required for user applications
and it can pass that information to the driver.  Driver can use this value
and update the HW directly. This mechanism is currently used only for the
newly introduced variable size WQEs.

Extend the bnxt_re_qp_req structure to pass the Send Queue slot count.
Reorganize the code to get the sq_slots before initializing the Send Queue
attributes.

Link: https://patch.msgid.link/r/1724042847-1481-5-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Hongguang Gao <hongguang.gao@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/bnxt_re-abi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index e61104f35d73..71140618700a 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -118,10 +118,16 @@ struct bnxt_re_resize_cq_req {
 	__aligned_u64 cq_va;
 };
 
+enum bnxt_re_qp_mask {
+	BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1,
+};
+
 struct bnxt_re_qp_req {
 	__aligned_u64 qpsva;
 	__aligned_u64 qprva;
 	__aligned_u64 qp_handle;
+	__aligned_u64 comp_mask;
+	__u32 sq_slots;
 };
 
 struct bnxt_re_qp_resp {
-- 
cgit v1.2.3


From 10a104c0debbb19a1e45193d5670510216e339ff Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Sun, 18 Aug 2024 21:47:27 -0700
Subject: RDMA/bnxt_re: Enable variable size WQEs for user space applications

Add backward compatibility code to enable variable size WQEs only if the
user lib supports it.

Link: https://patch.msgid.link/r/1724042847-1481-6-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Hongguang Gao <hongguang.gao@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/bnxt_re-abi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index 71140618700a..6821002931c8 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -66,6 +66,7 @@ enum bnxt_re_wqe_mode {
 
 enum {
 	BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01,
+	BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT = 0x02,
 };
 
 struct bnxt_re_uctx_req {
-- 
cgit v1.2.3


From a9b8f337ea4eb66e4980f90bb73e8786e56cacfd Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:14 +0530
Subject: ACPI: scan: Add a weak arch_sort_irqchip_probe() to order the IRQCHIP
 probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike OF framework, the irqchip probe using IRQCHIP_ACPI_DECLARE has no
order defined. Depending on the Makefile is not a good idea. So,
usually it is worked around by mandating only root interrupt controller
probed using IRQCHIP_ACPI_DECLARE and other interrupt controllers are
probed via cascade mechanism.

However, this is also not a clean solution because if there are multiple
root controllers (ex: RINTC in RISC-V which is per CPU) which need to be
probed first, then the cascade will happen for every root controller.
So, introduce an architecture specific weak function
arch_sort_irqchip_probe() to order the probing of the interrupt
controllers which can be implemented by different architectures as per
their interrupt controller hierarchy.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://patch.msgid.link/20240812005929.113499-3-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0687a442fec7..3fff86f95c2f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1343,6 +1343,8 @@ struct acpi_probe_entry {
 	kernel_ulong_t driver_data;
 };
 
+void arch_sort_irqchip_probe(struct acpi_probe_entry *ap_head, int nr);
+
 #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable,	\
 				 valid, data, fn)			\
 	static const struct acpi_probe_entry __acpi_probe_##name	\
-- 
cgit v1.2.3


From f7d7ccf92f2b9398781f791b4af1a74a9f65b5c3 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:15 +0530
Subject: ACPI: bus: Add acpi_riscv_init() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new function for RISC-V to do architecture specific initialization
similar to acpi_arm_init(). Some of the ACPI tables are architecture
specific and there is no reason trying to find them on other
architectures. So, add acpi_riscv_init() similar to acpi_arm_init().

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://patch.msgid.link/20240812005929.113499-4-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3fff86f95c2f..892025d873f0 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1531,6 +1531,12 @@ void acpi_arm_init(void);
 static inline void acpi_arm_init(void) { }
 #endif
 
+#ifdef CONFIG_RISCV
+void acpi_riscv_init(void);
+#else
+static inline void acpi_riscv_init(void) { }
+#endif
+
 #ifdef CONFIG_ACPI_PCC
 void acpi_init_pcc(void);
 #else
-- 
cgit v1.2.3


From 76d749c58f4c6dd333cb8ea515373f7c8df96e78 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:16 +0530
Subject: ACPI: scan: Refactor dependency creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some architectures like RISC-V will use implicit dependencies like GSI
map to create dependencies between interrupt controller and devices. To
support doing that, the function which creates the dependency, is
refactored bit and made public so that dependency can be added from
outside of scan.c as well.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://patch.msgid.link/20240812005929.113499-5-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8db5bd382915..d6a4dd58e36f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -993,6 +993,7 @@ static inline void acpi_put_acpi_dev(struct acpi_device *adev)
 
 int acpi_wait_for_acpi_ipmi(void);
 
+int acpi_scan_add_dep(acpi_handle handle, struct acpi_handle_list *dep_devices);
 #else	/* CONFIG_ACPI */
 
 static inline int register_acpi_bus_type(void *bus) { return 0; }
-- 
cgit v1.2.3


From 8cf252737b418c311d1016b0a7956407a235bb8d Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:18 +0530
Subject: ACPI: scan: Define weak function to populate dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some architectures like RISC-V need to add dependencies without explicit
_DEP. Define a weak function which can be implemented by the architecture.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://patch.msgid.link/20240812005929.113499-7-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index d6a4dd58e36f..af72a5d9de99 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -994,6 +994,7 @@ static inline void acpi_put_acpi_dev(struct acpi_device *adev)
 int acpi_wait_for_acpi_ipmi(void);
 
 int acpi_scan_add_dep(acpi_handle handle, struct acpi_handle_list *dep_devices);
+u32 arch_acpi_add_auto_dep(acpi_handle handle);
 #else	/* CONFIG_ACPI */
 
 static inline int register_acpi_bus_type(void *bus) { return 0; }
-- 
cgit v1.2.3


From 21734d29f84ad610dbafdca5f4c1bf7ecceeb2fb Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:19 +0530
Subject: ACPI: bus: Add RINTC IRQ model for RISC-V
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the IRQ model for RISC-V INTC so that acpi_set_irq_model can use this
for RISC-V.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://patch.msgid.link/20240812005929.113499-8-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 892025d873f0..3a21f1cf126f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -107,6 +107,7 @@ enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PLATFORM,
 	ACPI_IRQ_MODEL_GIC,
 	ACPI_IRQ_MODEL_LPIC,
+	ACPI_IRQ_MODEL_RINTC,
 	ACPI_IRQ_MODEL_COUNT
 };
 
-- 
cgit v1.2.3


From fbe826b1c10699a70b81a441fe47b817d1019f37 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 12 Aug 2024 06:29:27 +0530
Subject: irqchip/riscv-imsic: Add ACPI support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RISC-V IMSIC interrupt controller provides IPI and MSI support.
Currently, DT based drivers setup the IPI feature early during boot but
defer setting up the MSI functionality. However, in ACPI systems, PCI
subsystem is probed early and assume MSI controller is already setup.
Hence, both IPI and MSI features need to be initialized early itself.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20240812005929.113499-16-sunilvl@ventanamicro.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irqchip/riscv-imsic.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h
index faf0b800b1b0..7494952c5518 100644
--- a/include/linux/irqchip/riscv-imsic.h
+++ b/include/linux/irqchip/riscv-imsic.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/fwnode.h>
 #include <asm/csr.h>
 
 #define IMSIC_MMIO_PAGE_SHIFT		12
@@ -84,4 +86,11 @@ static inline const struct imsic_global_config *imsic_get_global_config(void)
 
 #endif
 
+#ifdef CONFIG_ACPI
+int imsic_platform_acpi_probe(struct fwnode_handle *fwnode);
+struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev);
+#else
+static inline struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev) { return NULL; }
+#endif
+
 #endif
-- 
cgit v1.2.3


From a707f85d47cad3978bef8fe90c7a79e3998e9d36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:17 +0200
Subject: HID: bpf: constify parameter rdesc of call_hid_bpf_rdesc_fixup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter is never modified, so mark it as const.
Also inline the return statement to avoid a type mismatch error.

This is a prerequisite for constification changes in the HID core.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-1-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index d4d063cf63b5..6a47223e6460 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
 int hid_bpf_device_init(struct hid_device *hid);
-u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size);
+u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size);
 #else /* CONFIG_HID_BPF */
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
 						u8 *data, u32 *size, int interrupt,
-- 
cgit v1.2.3


From 6737769ca0b6bfc82df53db5fd69068902d608db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:18 +0200
Subject: HID: constify parameter rdesc of hid_parse_report()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter is never modified, so mark it as const.

This is a prerequisite for constification changes in the HID core.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-2-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 1533c9dcd3a6..e7a5d6f2f2eb 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -953,7 +953,7 @@ struct hid_device *hid_allocate_device(void);
 struct hid_report *hid_register_report(struct hid_device *device,
 				       enum hid_report_type type, unsigned int id,
 				       unsigned int application);
-int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
+int hid_parse_report(struct hid_device *hid, const __u8 *start, unsigned size);
 struct hid_report *hid_validate_values(struct hid_device *hid,
 				       enum hid_report_type type, unsigned int id,
 				       unsigned int field_index,
-- 
cgit v1.2.3


From 24ddd0d7de7aaf50f537fd727f31616cb5a65a9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:19 +0200
Subject: HID: constify hid_device::rdesc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Once a report descriptor has been created by the HID core it is not
supposed to be modified anymore.
Enforce this invariant through the type system.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-3-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index e7a5d6f2f2eb..dda34d8cad19 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -602,7 +602,7 @@ struct hid_ll_driver;
 struct hid_device {							/* device report descriptor */
 	__u8 *dev_rdesc;
 	unsigned dev_rsize;
-	__u8 *rdesc;
+	const __u8 *rdesc;
 	unsigned rsize;
 	struct hid_collection *collection;				/* List of HID collections */
 	unsigned collection_size;					/* Number of allocated hid_collections */
-- 
cgit v1.2.3


From 80cfb508f3fe4c7c6a567fc4aa863c9a38709cd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:20 +0200
Subject: HID: constify params and return value of fetch_item()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fetch_item() does not modify the descriptor it operates on.
As a prerequisite for the constification of hid_driver::dev_rdesc,
mark the parameters and return value of fetch_item() as const.

Also adapt the variable types in the callers to match this
constification.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-4-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index dda34d8cad19..502bbc6f078c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -46,7 +46,7 @@ struct hid_item {
 	    __s16  s16;
 	    __u32  u32;
 	    __s32  s32;
-	    __u8  *longdata;
+	    const __u8  *longdata;
 	} data;
 };
 
-- 
cgit v1.2.3


From 3593630c89d7d3963c42262694f8aa8b4727a0ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:21 +0200
Subject: HID: constify hid_device::dev_rdesc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Once a report descriptor has been created by the HID core it is not
supposed to be modified anymore.
Enforce this invariant through the type system.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-5-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 502bbc6f078c..c5fb43db0f2e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -600,7 +600,7 @@ struct hid_driver;
 struct hid_ll_driver;
 
 struct hid_device {							/* device report descriptor */
-	__u8 *dev_rdesc;
+	const __u8 *dev_rdesc;
 	unsigned dev_rsize;
 	const __u8 *rdesc;
 	unsigned rsize;
-- 
cgit v1.2.3


From fe73965d078670406acee0218f118c0870d6a58b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 3 Aug 2024 14:34:22 +0200
Subject: HID: change return type of report_fixup() to const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By allowing the drivers to return a "const *" they can constify their
static report arrays.
This makes it clear to driver authors that the HID core will not modify
those reports and they can be reused for multiple devices.
Furthermore security is slightly improved as those reports are protected
against accidental or malicious modifications.

[bentiss: fixup hid-cougar.c and hid-multitouch.c for latest version of
the master branch]

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-6-f53d7a7b29d8@weissschuh.net
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index c5fb43db0f2e..3a998fa3812d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -822,7 +822,7 @@ struct hid_driver {
 			struct hid_usage *usage, __s32 value);
 	void (*report)(struct hid_device *hdev, struct hid_report *report);
 
-	__u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
+	const __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
 			unsigned int *size);
 
 	int (*input_mapping)(struct hid_device *hdev,
-- 
cgit v1.2.3


From 70c261d500951cf3ea0fcf32651aab9a65a91471 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 26 Aug 2024 15:03:23 +0200
Subject: netfilter: nf_tables_ipv6: consider network offset in netdev/egress
 validation

From netdev/egress, skb->len can include the ethernet header, therefore,
subtract network offset from skb->len when validating IPv6 packet length.

Fixes: 42df6e1d221d ("netfilter: Introduce egress hook")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_ipv6.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index 467d59b9e533..a0633eeaec97 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -31,8 +31,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
 	struct ipv6hdr *ip6h, _ip6h;
 	unsigned int thoff = 0;
 	unsigned short frag_off;
+	u32 pkt_len, skb_len;
 	int protohdr;
-	u32 pkt_len;
 
 	ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
 				  sizeof(*ip6h), &_ip6h);
@@ -43,7 +43,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
 		return -1;
 
 	pkt_len = ntohs(ip6h->payload_len);
-	if (pkt_len + sizeof(*ip6h) > pkt->skb->len)
+	skb_len = pkt->skb->len - skb_network_offset(pkt->skb);
+	if (pkt_len + sizeof(*ip6h) > skb_len)
 		return -1;
 
 	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
-- 
cgit v1.2.3


From ba7e053ec89f61be9d27bfb244de52849b5138aa Mon Sep 17 00:00:00 2001
From: Artur Weber <aweber.kernel@gmail.com>
Date: Fri, 16 Aug 2024 10:19:09 +0200
Subject: power: supply: max77693: Expose input current limit and CC current
 properties

There are two charger current limit registers:

- Fast charge current limit (which controls current going from the
  charger to the battery);
- CHGIN input current limit (which controls current going into the
  charger through the cable).

Add the necessary functions to retrieve the CHGIN input limit (from CHARGER
regulator) and maximum fast charge current values, and expose them as power
supply properties.

Tested-by: Henrik Grimler <henrik@grimler.se>
Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240816-max77693-charger-extcon-v4-3-050a0a9bfea0@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/mfd/max77693-private.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 54444ff2a5de..20c5e02ed9da 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -217,6 +217,10 @@ enum max77693_charger_battery_state {
 #define CHG_CNFG_01_CHGRSTRT_MASK	(0x3 << CHG_CNFG_01_CHGRSTRT_SHIFT)
 #define CHG_CNFG_01_PQEN_MAKS		BIT(CHG_CNFG_01_PQEN_SHIFT)
 
+/* MAX77693_CHG_REG_CHG_CNFG_02 register */
+#define CHG_CNFG_02_CC_SHIFT		0
+#define CHG_CNFG_02_CC_MASK		0x3F
+
 /* MAX77693_CHG_REG_CHG_CNFG_03 register */
 #define CHG_CNFG_03_TOITH_SHIFT		0
 #define CHG_CNFG_03_TOTIME_SHIFT	3
@@ -244,6 +248,7 @@ enum max77693_charger_battery_state {
 #define CHG_CNFG_12_VCHGINREG_MASK	(0x3 << CHG_CNFG_12_VCHGINREG_SHIFT)
 
 /* MAX77693 CHG_CNFG_09 Register */
+#define CHG_CNFG_09_CHGIN_ILIM_SHIFT	0
 #define CHG_CNFG_09_CHGIN_ILIM_MASK	0x7F
 
 /* MAX77693 CHG_CTRL Register */
-- 
cgit v1.2.3


From 2aeeef906d5a526dc60cf4af92eda69836c39b1f Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Fri, 23 Aug 2024 06:10:56 +0300
Subject: bonding: change ipsec_lock from spin lock to mutex

In the cited commit, bond->ipsec_lock is added to protect ipsec_list,
hence xdo_dev_state_add and xdo_dev_state_delete are called inside
this lock. As ipsec_lock is a spin lock and such xfrmdev ops may sleep,
"scheduling while atomic" will be triggered when changing bond's
active slave.

[  101.055189] BUG: scheduling while atomic: bash/902/0x00000200
[  101.055726] Modules linked in:
[  101.058211] CPU: 3 PID: 902 Comm: bash Not tainted 6.9.0-rc4+ #1
[  101.058760] Hardware name:
[  101.059434] Call Trace:
[  101.059436]  <TASK>
[  101.060873]  dump_stack_lvl+0x51/0x60
[  101.061275]  __schedule_bug+0x4e/0x60
[  101.061682]  __schedule+0x612/0x7c0
[  101.062078]  ? __mod_timer+0x25c/0x370
[  101.062486]  schedule+0x25/0xd0
[  101.062845]  schedule_timeout+0x77/0xf0
[  101.063265]  ? asm_common_interrupt+0x22/0x40
[  101.063724]  ? __bpf_trace_itimer_state+0x10/0x10
[  101.064215]  __wait_for_common+0x87/0x190
[  101.064648]  ? usleep_range_state+0x90/0x90
[  101.065091]  cmd_exec+0x437/0xb20 [mlx5_core]
[  101.065569]  mlx5_cmd_do+0x1e/0x40 [mlx5_core]
[  101.066051]  mlx5_cmd_exec+0x18/0x30 [mlx5_core]
[  101.066552]  mlx5_crypto_create_dek_key+0xea/0x120 [mlx5_core]
[  101.067163]  ? bonding_sysfs_store_option+0x4d/0x80 [bonding]
[  101.067738]  ? kmalloc_trace+0x4d/0x350
[  101.068156]  mlx5_ipsec_create_sa_ctx+0x33/0x100 [mlx5_core]
[  101.068747]  mlx5e_xfrm_add_state+0x47b/0xaa0 [mlx5_core]
[  101.069312]  bond_change_active_slave+0x392/0x900 [bonding]
[  101.069868]  bond_option_active_slave_set+0x1c2/0x240 [bonding]
[  101.070454]  __bond_opt_set+0xa6/0x430 [bonding]
[  101.070935]  __bond_opt_set_notify+0x2f/0x90 [bonding]
[  101.071453]  bond_opt_tryset_rtnl+0x72/0xb0 [bonding]
[  101.071965]  bonding_sysfs_store_option+0x4d/0x80 [bonding]
[  101.072567]  kernfs_fop_write_iter+0x10c/0x1a0
[  101.073033]  vfs_write+0x2d8/0x400
[  101.073416]  ? alloc_fd+0x48/0x180
[  101.073798]  ksys_write+0x5f/0xe0
[  101.074175]  do_syscall_64+0x52/0x110
[  101.074576]  entry_SYSCALL_64_after_hwframe+0x4b/0x53

As bond_ipsec_add_sa_all and bond_ipsec_del_sa_all are only called
from bond_change_active_slave, which requires holding the RTNL lock.
And bond_ipsec_add_sa and bond_ipsec_del_sa are xfrm state
xdo_dev_state_add and xdo_dev_state_delete APIs, which are in user
context. So ipsec_lock doesn't have to be spin lock, change it to
mutex, and thus the above issue can be resolved.

Fixes: 9a5605505d9c ("bonding: Add struct bond_ipesc to manage SA")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Jay Vosburgh <jv@jvosburgh.net>
Link: https://patch.msgid.link/20240823031056.110999-4-jianbol@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/bonding.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index b61fb1aa3a56..8bb5f016969f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -260,7 +260,7 @@ struct bonding {
 #ifdef CONFIG_XFRM_OFFLOAD
 	struct list_head ipsec_list;
 	/* protecting ipsec_list */
-	spinlock_t ipsec_lock;
+	struct mutex ipsec_lock;
 #endif /* CONFIG_XFRM_OFFLOAD */
 	struct bpf_prog *xdp_prog;
 };
-- 
cgit v1.2.3


From 3410d0e14f9a6856386c6a8eb2310cbc58191777 Mon Sep 17 00:00:00 2001
From: Shradha Gupta <shradhagupta@linux.microsoft.com>
Date: Mon, 26 Aug 2024 09:07:41 -0700
Subject: net: mana: Implement get_ringparam/set_ringparam for mana

Currently the values of WQs for RX and TX queues for MANA devices
are hardcoded to default sizes.
Allow configuring these values for MANA devices as ringparam
configuration(get/set) through ethtool_ops.
Pre-allocate buffers at the beginning of this operation, to
prevent complete network loss in low-memory conditions.

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Link: https://patch.msgid.link/1724688461-12203-1-git-send-email-shradhagupta@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mana/mana.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 7caa334f4888..1f869624811d 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -38,9 +38,21 @@ enum TRI_STATE {
 
 #define COMP_ENTRY_SIZE 64
 
-#define RX_BUFFERS_PER_QUEUE 512
+/* This Max value for RX buffers is derived from __alloc_page()'s max page
+ * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer
+ * size beyond this value gets rejected by __alloc_page() call.
+ */
+#define MAX_RX_BUFFERS_PER_QUEUE 8192
+#define DEF_RX_BUFFERS_PER_QUEUE 512
+#define MIN_RX_BUFFERS_PER_QUEUE 128
 
-#define MAX_SEND_BUFFERS_PER_QUEUE 256
+/* This max value for TX buffers is derived as the maximum allocatable
+ * pages supported on host per guest through testing. TX buffer size beyond
+ * this value is rejected by the hardware.
+ */
+#define MAX_TX_BUFFERS_PER_QUEUE 16384
+#define DEF_TX_BUFFERS_PER_QUEUE 256
+#define MIN_TX_BUFFERS_PER_QUEUE 128
 
 #define EQ_SIZE (8 * MANA_PAGE_SIZE)
 
@@ -286,7 +298,7 @@ struct mana_recv_buf_oob {
 	void *buf_va;
 	bool from_pool; /* allocated from a page pool */
 
-	/* SGL of the buffer going to be sent has part of the work request. */
+	/* SGL of the buffer going to be sent as part of the work request. */
 	u32 num_sge;
 	struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES];
 
@@ -438,6 +450,9 @@ struct mana_port_context {
 	unsigned int max_queues;
 	unsigned int num_queues;
 
+	unsigned int rx_queue_size;
+	unsigned int tx_queue_size;
+
 	mana_handle_t port_handle;
 	mana_handle_t pf_filter_handle;
 
@@ -473,6 +488,8 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
 void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
 int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu);
+void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 
-- 
cgit v1.2.3


From a96c5515d0d15df103598b2bc57245d66143b5dd Mon Sep 17 00:00:00 2001
From: Tristram Ha <tristram.ha@microchip.com>
Date: Mon, 26 Aug 2024 21:43:08 +0000
Subject: net: dsa: microchip: Add KSZ8895/KSZ8864 switch support

KSZ8895/KSZ8864 is a switch family between KSZ8863/73 and KSZ8795, so it
shares some registers and functions in those switches already
implemented in the KSZ DSA driver.

Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
Tested-by: Pieter Van Trappen <pieter.van.trappen@cern.ch>
Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/platform_data/microchip-ksz.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
index 8c659db4da6b..d074019474f5 100644
--- a/include/linux/platform_data/microchip-ksz.h
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -28,6 +28,8 @@ enum ksz_chip_id {
 	KSZ8794_CHIP_ID = 0x8794,
 	KSZ8765_CHIP_ID = 0x8765,
 	KSZ8830_CHIP_ID = 0x8830,
+	KSZ8864_CHIP_ID = 0x8864,
+	KSZ8895_CHIP_ID = 0x8895,
 	KSZ9477_CHIP_ID = 0x00947700,
 	KSZ9896_CHIP_ID = 0x00989600,
 	KSZ9897_CHIP_ID = 0x00989700,
-- 
cgit v1.2.3


From 17163f23678c7599e40758d7b96f68e3f3f2ea15 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Aug 2024 15:38:23 +0200
Subject: xfrm: minor update to sdb and xfrm_policy comments

The spd is no longer maintained as a linear list.
We also haven't been caching bundles in the xfrm_policy
struct since 2010.

While at it, add kdoc style comments for the xfrm_policy structure
and extend the description of the current rbtree based search to
mention why it needs to search the candidate set.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1fa2da22a49e..b6bfdc6416c7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -67,13 +67,15 @@
    - instance of a transformer, struct xfrm_state (=SA)
    - template to clone xfrm_state, struct xfrm_tmpl
 
-   SPD is plain linear list of xfrm_policy rules, ordered by priority.
+   SPD is organized as hash table (for policies that meet minimum address prefix
+   length setting, net->xfrm.policy_hthresh).  Other policies are stored in
+   lists, sorted into rbtree ordered by destination and source address networks.
+   See net/xfrm/xfrm_policy.c for details.
+
    (To be compatible with existing pfkeyv2 implementations,
    many rules with priority of 0x7fffffff are allowed to exist and
    such rules are ordered in an unpredictable way, thanks to bsd folks.)
 
-   Lookup is plain linear search until the first match with selector.
-
    If "action" is "block", then we prohibit the flow, otherwise:
    if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
    policy entry has list of up to XFRM_MAX_DEPTH transformations,
@@ -86,8 +88,6 @@
                      |---. child .-> dst -. xfrm .-> xfrm_state #3
                                       |---. child .-> NULL
 
-   Bundles are cached at xrfm_policy struct (field ->bundles).
-
 
    Resolution of xrfm_tmpl
    -----------------------
@@ -526,6 +526,36 @@ struct xfrm_policy_queue {
 	unsigned long		timeout;
 };
 
+/**
+ *	struct xfrm_policy - xfrm policy
+ *	@xp_net: network namespace the policy lives in
+ *	@bydst: hlist node for SPD hash table or rbtree list
+ *	@byidx: hlist node for index hash table
+ *	@lock: serialize changes to policy structure members
+ *	@refcnt: reference count, freed once it reaches 0
+ *	@pos: kernel internal tie-breaker to determine age of policy
+ *	@timer: timer
+ *	@genid: generation, used to invalidate old policies
+ *	@priority: priority, set by userspace
+ *	@index:  policy index (autogenerated)
+ *	@if_id: virtual xfrm interface id
+ *	@mark: packet mark
+ *	@selector: selector
+ *	@lft: liftime configuration data
+ *	@curlft: liftime state
+ *	@walk: list head on pernet policy list
+ *	@polq: queue to hold packets while aqcuire operaion in progress
+ *	@bydst_reinsert: policy tree node needs to be merged
+ *	@type: XFRM_POLICY_TYPE_MAIN or _SUB
+ *	@action: XFRM_POLICY_ALLOW or _BLOCK
+ *	@flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP
+ *	@xfrm_nr: number of used templates in @xfrm_vec
+ *	@family: protocol family
+ *	@security: SELinux security label
+ *	@xfrm_vec: array of templates to resolve state
+ *	@rcu: rcu head, used to defer memory release
+ *	@xdo: hardware offload state
+ */
 struct xfrm_policy {
 	possible_net_t		xp_net;
 	struct hlist_node	bydst;
-- 
cgit v1.2.3


From a547a5880cba6f287179135381f1b484b251be31 Mon Sep 17 00:00:00 2001
From: Peter Newman <peternewman@google.com>
Date: Thu, 22 Aug 2024 12:02:11 -0700
Subject: x86/resctrl: Fix arch_mbm_* array overrun on SNC

When using resctrl on systems with Sub-NUMA Clustering enabled, monitoring
groups may be allocated RMID values which would overrun the
arch_mbm_{local,total} arrays.

This is due to inconsistencies in whether the SNC-adjusted num_rmid value or
the unadjusted value in resctrl_arch_system_num_rmid_idx() is used. The
num_rmid value for the L3 resource is currently:

  resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache

As a simple fix, make resctrl_arch_system_num_rmid_idx() return the
SNC-adjusted, L3 num_rmid value on x86.

Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache")
Signed-off-by: Peter Newman <peternewman@google.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/r/20240822190212.1848788-1-peternewman@google.com
---
 include/linux/resctrl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b0875b99e811..d94abba1c716 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -248,6 +248,7 @@ struct resctrl_schema {
 
 /* The number of closid supported by this resource regardless of CDP */
 u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
+u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
 /*
-- 
cgit v1.2.3


From 1934b212615dc617ac84fc306333ab2b9fc3b04f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 9 Aug 2024 18:00:01 +0200
Subject: file: reclaim 24 bytes from f_owner

We do embedd struct fown_struct into struct file letting it take up 32
bytes in total. We could tweak struct fown_struct to be more compact but
really it shouldn't even be embedded in struct file in the first place.

Instead, actual users of struct fown_struct should allocate the struct
on demand. This frees up 24 bytes in struct file.

That will have some potentially user-visible changes for the ownership
fcntl()s. Some of them can now fail due to allocation failures.
Practically, that probably will almost never happen as the allocations
are small and they only happen once per file.

The fown_struct is used during kill_fasync() which is used by e.g.,
pipes to generate a SIGIO signal. Sending of such signals is conditional
on userspace having set an owner for the file using one of the F_OWNER
fcntl()s. Such users will be unaffected if struct fown_struct is
allocated during the fcntl() call.

There are a few subsystems that call __f_setown() expecting
file->f_owner to be allocated:

(1) tun devices
    file->f_op->fasync::tun_chr_fasync()
    -> __f_setown()

    There are no callers of tun_chr_fasync().

(2) tty devices

    file->f_op->fasync::tty_fasync()
    -> __tty_fasync()
       -> __f_setown()

    tty_fasync() has no additional callers but __tty_fasync() has. Note
    that __tty_fasync() only calls __f_setown() if the @on argument is
    true. It's called from:

    file->f_op->release::tty_release()
    -> tty_release()
       -> __tty_fasync()
          -> __f_setown()

    tty_release() calls __tty_fasync() with @on false
    => __f_setown() is never called from tty_release().
       => All callers of tty_release() are safe as well.

    file->f_op->release::tty_open()
    -> tty_release()
       -> __tty_fasync()
          -> __f_setown()

    __tty_hangup() calls __tty_fasync() with @on false
    => __f_setown() is never called from tty_release().
       => All callers of __tty_hangup() are safe as well.

From the callchains it's obvious that (1) and (2) end up getting called
via file->f_op->fasync(). That can happen either through the F_SETFL
fcntl() with the FASYNC flag raised or via the FIOASYNC ioctl(). If
FASYNC is requested and the file isn't already FASYNC then
file->f_op->fasync() is called with @on true which ends up causing both
(1) and (2) to call __f_setown().

(1) and (2) are the only subsystems that call __f_setown() from the
file->f_op->fasync() handler. So both (1) and (2) have been updated to
allocate a struct fown_struct prior to calling fasync_helper() to
register with the fasync infrastructure. That's safe as they both call
fasync_helper() which also does allocations if @on is true.

The other interesting case are file leases:

(3) file leases
    lease_manager_ops->lm_setup::lease_setup()
    -> __f_setown()

    Which in turn is called from:

    generic_add_lease()
    -> lease_manager_ops->lm_setup::lease_setup()
       -> __f_setown()

So here again we can simply make generic_add_lease() allocate struct
fown_struct prior to the lease_manager_ops->lm_setup::lease_setup()
which happens under a spinlock.

With that the two remaining subsystems that call __f_setown() are:

(4) dnotify
(5) sockets

Both have their own custom ioctls to set struct fown_struct and both
have been converted to allocate a struct fown_struct on demand from
their respective ioctls.

Interactions with O_PATH are fine as well e.g., when opening a /dev/tty
as O_PATH then no file->f_op->open() happens thus no file->f_owner is
allocated. That's fine as no file operation will be set for those and
the device has never been opened. fcntl()s called on such things will
just allocate a ->f_owner on demand. Although I have zero idea why'd you
care about f_owner on an O_PATH fd.

Link: https://lore.kernel.org/r/20240813-work-f_owner-v2-1-4e9343a79f9f@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb0426f349fc..7af239ca87e2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -947,6 +947,7 @@ static inline unsigned imajor(const struct inode *inode)
 }
 
 struct fown_struct {
+	struct file *file;	/* backpointer for security modules */
 	rwlock_t lock;          /* protects pid, uid, euid fields */
 	struct pid *pid;	/* pid or -pgrp where SIGIO should be sent */
 	enum pid_type pid_type;	/* Kind of process group SIGIO should be sent to */
@@ -1011,7 +1012,7 @@ struct file {
 	struct mutex		f_pos_lock;
 	loff_t			f_pos;
 	unsigned int		f_flags;
-	struct fown_struct	f_owner;
+	struct fown_struct	*f_owner;
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
 	struct path		f_path;
@@ -1076,6 +1077,12 @@ struct file_lease;
 #define OFFT_OFFSET_MAX	type_max(off_t)
 #endif
 
+int file_f_owner_allocate(struct file *file);
+static inline struct fown_struct *file_f_owner(const struct file *file)
+{
+	return READ_ONCE(file->f_owner);
+}
+
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 static inline struct inode *file_inode(const struct file *f)
@@ -1124,7 +1131,7 @@ extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force
 extern int f_setown(struct file *filp, int who, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
-extern int send_sigurg(struct fown_struct *fown);
+extern int send_sigurg(struct file *file);
 
 /*
  * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
-- 
cgit v1.2.3


From a55d1cbd1720679cfe9837bce250e397ec513989 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 22 Aug 2024 16:14:46 +0200
Subject: fs: switch f_iocb_flags and f_ra

Now that we shrank struct file by 24 bytes we still have a 4 byte hole.
If we move struct file_ra_state into the union and f_iocb_flags out of
the union we close that whole and bring down struct file to 192 bytes.
Which means struct file is 3 cachelines and we managed to shrink it by
40 bytes this cycle.

I've tried to audit all codepaths that use f_ra and none of them seem to
rely on it in file->f_op->release() and never have since commit
1da177e4c3f4 ("Linux-2.6.12-rc2").

Link: https://lore.kernel.org/r/20240823-luftdicht-berappen-d69a2166a0db@brauner
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7af239ca87e2..095a956aeb29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -999,9 +999,9 @@ struct file {
 		struct callback_head 	f_task_work;
 		/* fput() must use workqueue (most kernel threads). */
 		struct llist_node	f_llist;
-		unsigned int 		f_iocb_flags;
+		/* Invalid after last fput(). */
+		struct file_ra_state	f_ra;
 	};
-
 	/*
 	 * Protects f_ep, f_flags.
 	 * Must not be taken from IRQ context.
@@ -1012,9 +1012,9 @@ struct file {
 	struct mutex		f_pos_lock;
 	loff_t			f_pos;
 	unsigned int		f_flags;
+	unsigned int 		f_iocb_flags;
 	struct fown_struct	*f_owner;
 	const struct cred	*f_cred;
-	struct file_ra_state	f_ra;
 	struct path		f_path;
 	struct inode		*f_inode;	/* cached value */
 	const struct file_operations	*f_op;
-- 
cgit v1.2.3


From 65dc80a78c5f42f5648396b218f50374cf1c2770 Mon Sep 17 00:00:00 2001
From: Brent Lu <brent.lu@intel.com>
Date: Tue, 27 Aug 2024 20:32:06 +0800
Subject: ASoC: SOF: Intel: hda: support BT link mask in mach_params

Add an new variable bt_link_mask to snd_soc_acpi_mach_params structure.
SSP port mask of BT offload found in NHLT table will be sent to
machine driver to setup BE dai link with correct SSP port number.

This patch only detects and enables the BT dailink. The functionality
will only be unlocked with a topology file that makes a reference to
that BT dailink. For backwards-compatibility reasons, this topology
will not be used by default. Chromebooks and Linux users willing to
experiment shall use the tplg_name kernel parameter to force the use
of an enhanced topology.

Signed-off-by: Brent Lu <brent.lu@intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20240827123215.258859-9-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index b6d301946244..c1552bc6e31c 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -73,6 +73,7 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg)
  * @subsystem_rev: optional PCI SSID revision value
  * @subsystem_id_set: true if a value has been written to
  *		      subsystem_vendor and subsystem_device.
+ * @bt_link_mask: BT offload link enabled on the board
  */
 struct snd_soc_acpi_mach_params {
 	u32 acpi_ipc_irq_index;
@@ -89,6 +90,7 @@ struct snd_soc_acpi_mach_params {
 	unsigned short subsystem_device;
 	unsigned short subsystem_rev;
 	bool subsystem_id_set;
+	u32 bt_link_mask;
 };
 
 /**
-- 
cgit v1.2.3


From 290f31e943a29c93532b1684652b04fd60d0f696 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 26 Aug 2024 23:41:57 +0000
Subject: ASoC: soc-pcm: makes snd_soc_dpcm_can_be_xxx() local function

No driver is calling snd_soc_dpcm_can_be_xxx() functions. We don't need
to have EXPORT_SYMBOL_GPL() for them. Let's makes it static function.
One note is that snd_soc_dpcm_fe_can_update() is not used in upstream.
Use #if-endif and keep it for future support.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87h6b6df7e.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dpcm.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index 773f2db8c31c..c6fb350b4b06 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -113,24 +113,6 @@ struct snd_soc_dpcm_runtime {
 #define for_each_dpcm_be_rollback(fe, stream, _dpcm)			\
 	list_for_each_entry_continue_reverse(_dpcm, &(fe)->dpcm[stream].be_clients, list_be)
 
-/* can this BE stop and free */
-int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe,
-		struct snd_soc_pcm_runtime *be, int stream);
-
-/* can this BE perform a hw_params() */
-int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
-		struct snd_soc_pcm_runtime *be, int stream);
-
-/* can this BE perform prepare */
-int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe,
-				 struct snd_soc_pcm_runtime *be, int stream);
-
-/* is the current PCM operation for this FE ? */
-int snd_soc_dpcm_fe_can_update(struct snd_soc_pcm_runtime *fe, int stream);
-
-/* is the current PCM operation for this BE ? */
-int snd_soc_dpcm_be_can_update(struct snd_soc_pcm_runtime *fe,
-		struct snd_soc_pcm_runtime *be, int stream);
 
 /* get the substream for this BE */
 struct snd_pcm_substream *
-- 
cgit v1.2.3


From 1da29f2c39b67b846b74205c81bf0ccd96d34727 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 22 Aug 2024 23:06:49 +0100
Subject: netfs, cifs: Fix handling of short DIO read

Short DIO reads, particularly in relation to cifs, are not being handled
correctly by cifs and netfslib.  This can be tested by doing a DIO read of
a file where the size of read is larger than the size of the file.  When it
crosses the EOF, it gets a short read and this gets retried, and in the
case of cifs, the retry read fails, with the failure being translated to
ENODATA.

Fix this by the following means:

 (1) Add a flag, NETFS_SREQ_HIT_EOF, for the filesystem to set when it
     detects that the read did hit the EOF.

 (2) Make the netfslib read assessment stop processing subrequests when it
     encounters one with that flag set.

 (3) Return rreq->transferred, the accumulated contiguous amount read to
     that point, to userspace for a DIO read.

 (4) Make cifs set the flag and clear the error if the read RPC returned
     ENODATA.

 (5) Make cifs set the flag and clear the error if a short read occurred
     without error and the read-to file position is now at the remote inode
     size.

Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 include/linux/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 983816608f15..c47443e7a97e 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -198,6 +198,7 @@ struct netfs_io_subrequest {
 #define NETFS_SREQ_NEED_RETRY		9	/* Set if the filesystem requests a retry */
 #define NETFS_SREQ_RETRYING		10	/* Set if we're retrying */
 #define NETFS_SREQ_FAILED		11	/* Set if the subreq failed unretryably */
+#define NETFS_SREQ_HIT_EOF		12	/* Set if we hit the EOF */
 };
 
 enum netfs_io_origin {
-- 
cgit v1.2.3


From 789e51597d33ec0053b029127d797d86c0d857eb Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 28 Aug 2024 10:36:35 +0200
Subject: Revert "drm/ttm: Add a flag to allow drivers to skip clear-on-free"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove TTM_TT_FLAG_CLEARED_ON_FREE now that XE stopped using this
flag.

This reverts commit decbfaf06db05fa1f9b33149ebb3c145b44e878f.

Cc: Christian König <christian.koenig@amd.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-2-nirmoy.das@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 include/drm/ttm/ttm_tt.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index cfaf49de2419..2b9d856ff388 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -85,9 +85,6 @@ struct ttm_tt {
 	 * fault handling abuses the DMA api a bit and dma_map_attrs can't be
 	 * used to assure pgprot always matches.
 	 *
-	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles
-	 * clearing backing store
-	 *
 	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
 	 * set by TTM after ttm_tt_populate() has successfully returned, and is
 	 * then unset when TTM calls ttm_tt_unpopulate().
@@ -97,9 +94,8 @@ struct ttm_tt {
 #define TTM_TT_FLAG_EXTERNAL		BIT(2)
 #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
 #define TTM_TT_FLAG_DECRYPTED		BIT(4)
-#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
 
-#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
+#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
 	uint32_t page_flags;
 	/** @num_pages: Number of pages in the page array. */
 	uint32_t num_pages;
-- 
cgit v1.2.3


From 947697c6f0f75f9866f2f891a102dece7a09a064 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 22 Aug 2024 10:18:52 +0530
Subject: uapi: Define GENMASK_U128

This adds GENMASK_U128() and __GENMASK_U128() macros using __BITS_PER_U128
and __int128 data types. These macros will be used in providing support for
generating 128 bit masks.

The macros wouldn't work in all assembler flavors for reasons described
in the comments on top of declarations. Enforce it for more by adding
!__ASSEMBLY__ guard.

Cc: Yury Norov <yury.norov@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Arnd Bergmann <arnd@arndb.de>>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bits.h       | 15 +++++++++++++++
 include/uapi/linux/bits.h  |  3 +++
 include/uapi/linux/const.h | 17 +++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/bits.h b/include/linux/bits.h
index 0eb24d21aac2..60044b608817 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -36,4 +36,19 @@
 #define GENMASK_ULL(h, l) \
 	(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
 
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __GENMASK_U128() depends on _BIT128() which would not work
+ * in the asm code, as it shifts an 'unsigned __init128' data
+ * type instead of direct representation of 128 bit constants
+ * such as long and unsigned long. The fundamental problem is
+ * that a 128 bit constant will get silently truncated by the
+ * gcc compiler.
+ */
+#define GENMASK_U128(h, l) \
+	(GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
+#endif
+
 #endif	/* __LINUX_BITS_H */
diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h
index 3c2a101986a3..5ee30f882736 100644
--- a/include/uapi/linux/bits.h
+++ b/include/uapi/linux/bits.h
@@ -12,4 +12,7 @@
         (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
          (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
 
+#define __GENMASK_U128(h, l) \
+	((_BIT128((h)) << 1) - (_BIT128(l)))
+
 #endif /* _UAPI_LINUX_BITS_H */
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index a429381e7ca5..e16be0d37746 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -28,6 +28,23 @@
 #define _BITUL(x)	(_UL(1) << (x))
 #define _BITULL(x)	(_ULL(1) << (x))
 
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __init128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x)	((unsigned __int128)(1) << (x))
+#endif
+
 #define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
 #define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
 
-- 
cgit v1.2.3


From f951171044762a0d179caf9f1addcdbc7cb533bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Aug 2024 08:50:46 +0200
Subject: ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE

FALLOC_FL_NO_HIDE_STALE can't make it past vfs_fallocate (and if the
flag does what the name implies that's a good thing as it would be
highly dangerous).  Remove the dead tracing code for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240827065123.1762168-3-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/ext4.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index cc5e9b7b2b44..156908641e68 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
 #define show_falloc_mode(mode) __print_flags(mode, "|",		\
 	{ FALLOC_FL_KEEP_SIZE,		"KEEP_SIZE"},		\
 	{ FALLOC_FL_PUNCH_HOLE,		"PUNCH_HOLE"},		\
-	{ FALLOC_FL_NO_HIDE_STALE,	"NO_HIDE_STALE"},	\
 	{ FALLOC_FL_COLLAPSE_RANGE,	"COLLAPSE_RANGE"},	\
 	{ FALLOC_FL_ZERO_RANGE,		"ZERO_RANGE"})
 
-- 
cgit v1.2.3


From 57413d8e172c10c90fbd91f98d0f7d8eb27e824c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Aug 2024 08:50:47 +0200
Subject: fs: sort out the fallocate mode vs flag mess

The fallocate system call takes a mode argument, but that argument
contains a wild mix of exclusive modes and an optional flags.

Replace FALLOC_FL_SUPPORTED_MASK with FALLOC_FL_MODE_MASK, which excludes
the optional flag bit, so that we can use switch statement on the value
to easily enumerate the cases while getting the check for duplicate modes
for free.

To make this (and in the future the file system implementations) more
readable also add a symbolic name for the 0 mode used to allocate blocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240827065123.1762168-4-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/falloc.h      | 18 ++++++++++++------
 include/uapi/linux/falloc.h |  1 +
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index f3f0b97b1675..3f49f3df6af5 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -25,12 +25,18 @@ struct space_resv {
 #define FS_IOC_UNRESVSP64	_IOW('X', 43, struct space_resv)
 #define FS_IOC_ZERO_RANGE	_IOW('X', 57, struct space_resv)
 
-#define	FALLOC_FL_SUPPORTED_MASK	(FALLOC_FL_KEEP_SIZE |		\
-					 FALLOC_FL_PUNCH_HOLE |		\
-					 FALLOC_FL_COLLAPSE_RANGE |	\
-					 FALLOC_FL_ZERO_RANGE |		\
-					 FALLOC_FL_INSERT_RANGE |	\
-					 FALLOC_FL_UNSHARE_RANGE)
+/*
+ * Mask of all supported fallocate modes.  Only one can be set at a time.
+ *
+ * In addition to the mode bit, the mode argument can also encode flags.
+ * FALLOC_FL_KEEP_SIZE is the only supported flag so far.
+ */
+#define FALLOC_FL_MODE_MASK	(FALLOC_FL_ALLOCATE_RANGE |	\
+				 FALLOC_FL_PUNCH_HOLE |		\
+				 FALLOC_FL_COLLAPSE_RANGE |	\
+				 FALLOC_FL_ZERO_RANGE |		\
+				 FALLOC_FL_INSERT_RANGE |	\
+				 FALLOC_FL_UNSHARE_RANGE)
 
 /* on ia32 l_start is on a 32-bit boundary */
 #if defined(CONFIG_X86_64)
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 51398fa57f6c..5810371ed72b 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -2,6 +2,7 @@
 #ifndef _UAPI_FALLOC_H_
 #define _UAPI_FALLOC_H_
 
+#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */
 #define FALLOC_FL_KEEP_SIZE	0x01 /* default is extend size */
 #define FALLOC_FL_PUNCH_HOLE	0x02 /* de-allocates range */
 #define FALLOC_FL_NO_HIDE_STALE	0x04 /* reserved codepoint */
-- 
cgit v1.2.3


From 654beb75ca95240c00c78be0bbc2ea66a125a997 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 21 Aug 2024 18:46:33 +0200
Subject: dma: ipu: Remove include/linux/dma/ipu-dma.h

When this file was renamed in commit b8a6d9980f75 ("dma: ipu: rename
mach/ipu.h to include/linux/dma/ipu-dma.h"), 4 .c files have been modified
accordingly:

  drivers/dma/ipu/ipu_idmac.c
  drivers/dma/ipu/ipu_irq.c
  --> removed in commit f1de55ff7c70 ("dmaengine: ipu: Remove the driver")
      in 2023-08

  drivers/media/platform/soc_camera/mx3_camera.c
  --> removed in commit c93cc61475eb ("[media] staging/media: remove
      deprecated mx3 driver") in 2016-06

  drivers/video/mx3fb.c
  --> removed in commit bfac19e239a7 ("fbdev: mx3fb: Remove the driver")
      in 2023-08

Now include/linux/dma/ipu-dma.h is unused and can be removed as-well.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/532e7e2816ccf226f3ab1fa76ec7873bc09299d0.1724258714.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/ipu-dma.h | 174 --------------------------------------------
 1 file changed, 174 deletions(-)
 delete mode 100644 include/linux/dma/ipu-dma.h

(limited to 'include')

diff --git a/include/linux/dma/ipu-dma.h b/include/linux/dma/ipu-dma.h
deleted file mode 100644
index 6969391580d2..000000000000
--- a/include/linux/dma/ipu-dma.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2008
- * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
- *
- * Copyright (C) 2005-2007 Freescale Semiconductor, Inc.
- */
-
-#ifndef __LINUX_DMA_IPU_DMA_H
-#define __LINUX_DMA_IPU_DMA_H
-
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-
-/* IPU DMA Controller channel definitions. */
-enum ipu_channel {
-	IDMAC_IC_0 = 0,		/* IC (encoding task) to memory */
-	IDMAC_IC_1 = 1,		/* IC (viewfinder task) to memory */
-	IDMAC_ADC_0 = 1,
-	IDMAC_IC_2 = 2,
-	IDMAC_ADC_1 = 2,
-	IDMAC_IC_3 = 3,
-	IDMAC_IC_4 = 4,
-	IDMAC_IC_5 = 5,
-	IDMAC_IC_6 = 6,
-	IDMAC_IC_7 = 7,		/* IC (sensor data) to memory */
-	IDMAC_IC_8 = 8,
-	IDMAC_IC_9 = 9,
-	IDMAC_IC_10 = 10,
-	IDMAC_IC_11 = 11,
-	IDMAC_IC_12 = 12,
-	IDMAC_IC_13 = 13,
-	IDMAC_SDC_0 = 14,	/* Background synchronous display data */
-	IDMAC_SDC_1 = 15,	/* Foreground data (overlay) */
-	IDMAC_SDC_2 = 16,
-	IDMAC_SDC_3 = 17,
-	IDMAC_ADC_2 = 18,
-	IDMAC_ADC_3 = 19,
-	IDMAC_ADC_4 = 20,
-	IDMAC_ADC_5 = 21,
-	IDMAC_ADC_6 = 22,
-	IDMAC_ADC_7 = 23,
-	IDMAC_PF_0 = 24,
-	IDMAC_PF_1 = 25,
-	IDMAC_PF_2 = 26,
-	IDMAC_PF_3 = 27,
-	IDMAC_PF_4 = 28,
-	IDMAC_PF_5 = 29,
-	IDMAC_PF_6 = 30,
-	IDMAC_PF_7 = 31,
-};
-
-/* Order significant! */
-enum ipu_channel_status {
-	IPU_CHANNEL_FREE,
-	IPU_CHANNEL_INITIALIZED,
-	IPU_CHANNEL_READY,
-	IPU_CHANNEL_ENABLED,
-};
-
-#define IPU_CHANNELS_NUM 32
-
-enum pixel_fmt {
-	/* 1 byte */
-	IPU_PIX_FMT_GENERIC,
-	IPU_PIX_FMT_RGB332,
-	IPU_PIX_FMT_YUV420P,
-	IPU_PIX_FMT_YUV422P,
-	IPU_PIX_FMT_YUV420P2,
-	IPU_PIX_FMT_YVU422P,
-	/* 2 bytes */
-	IPU_PIX_FMT_RGB565,
-	IPU_PIX_FMT_RGB666,
-	IPU_PIX_FMT_BGR666,
-	IPU_PIX_FMT_YUYV,
-	IPU_PIX_FMT_UYVY,
-	/* 3 bytes */
-	IPU_PIX_FMT_RGB24,
-	IPU_PIX_FMT_BGR24,
-	/* 4 bytes */
-	IPU_PIX_FMT_GENERIC_32,
-	IPU_PIX_FMT_RGB32,
-	IPU_PIX_FMT_BGR32,
-	IPU_PIX_FMT_ABGR32,
-	IPU_PIX_FMT_BGRA32,
-	IPU_PIX_FMT_RGBA32,
-};
-
-enum ipu_color_space {
-	IPU_COLORSPACE_RGB,
-	IPU_COLORSPACE_YCBCR,
-	IPU_COLORSPACE_YUV
-};
-
-/*
- * Enumeration of IPU rotation modes
- */
-enum ipu_rotate_mode {
-	/* Note the enum values correspond to BAM value */
-	IPU_ROTATE_NONE = 0,
-	IPU_ROTATE_VERT_FLIP = 1,
-	IPU_ROTATE_HORIZ_FLIP = 2,
-	IPU_ROTATE_180 = 3,
-	IPU_ROTATE_90_RIGHT = 4,
-	IPU_ROTATE_90_RIGHT_VFLIP = 5,
-	IPU_ROTATE_90_RIGHT_HFLIP = 6,
-	IPU_ROTATE_90_LEFT = 7,
-};
-
-/*
- * Enumeration of DI ports for ADC.
- */
-enum display_port {
-	DISP0,
-	DISP1,
-	DISP2,
-	DISP3
-};
-
-struct idmac_video_param {
-	unsigned short		in_width;
-	unsigned short		in_height;
-	uint32_t		in_pixel_fmt;
-	unsigned short		out_width;
-	unsigned short		out_height;
-	uint32_t		out_pixel_fmt;
-	unsigned short		out_stride;
-	bool			graphics_combine_en;
-	bool			global_alpha_en;
-	bool			key_color_en;
-	enum display_port	disp;
-	unsigned short		out_left;
-	unsigned short		out_top;
-};
-
-/*
- * Union of initialization parameters for a logical channel. So far only video
- * parameters are used.
- */
-union ipu_channel_param {
-	struct idmac_video_param video;
-};
-
-struct idmac_tx_desc {
-	struct dma_async_tx_descriptor	txd;
-	struct scatterlist		*sg;	/* scatterlist for this */
-	unsigned int			sg_len;	/* tx-descriptor. */
-	struct list_head		list;
-};
-
-struct idmac_channel {
-	struct dma_chan		dma_chan;
-	dma_cookie_t		completed;	/* last completed cookie	   */
-	union ipu_channel_param	params;
-	enum ipu_channel	link;	/* input channel, linked to the output	   */
-	enum ipu_channel_status	status;
-	void			*client;	/* Only one client per channel	   */
-	unsigned int		n_tx_desc;
-	struct idmac_tx_desc	*desc;		/* allocated tx-descriptors	   */
-	struct scatterlist	*sg[2];	/* scatterlist elements in buffer-0 and -1 */
-	struct list_head	free_list;	/* free tx-descriptors		   */
-	struct list_head	queue;		/* queued tx-descriptors	   */
-	spinlock_t		lock;		/* protects sg[0,1], queue	   */
-	struct mutex		chan_mutex; /* protects status, cookie, free_list  */
-	bool			sec_chan_en;
-	int			active_buffer;
-	unsigned int		eof_irq;
-	char			eof_name[16];	/* EOF IRQ name for request_irq()  */
-};
-
-#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd)
-#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan)
-
-#endif /* __LINUX_DMA_IPU_DMA_H */
-- 
cgit v1.2.3


From 73d5fc92a11cacb73a1aac0b5793c47e48c5b537 Mon Sep 17 00:00:00 2001
From: Nishad Saraf <nishads@amd.com>
Date: Mon, 19 Aug 2024 14:19:48 -0700
Subject: dmaengine: amd: qdma: Add AMD QDMA driver

Adds driver to enable PCIe board which uses AMD QDMA (the Queue-based
Direct Memory Access) subsystem. For example, Xilinx Alveo V70 AI
Accelerator devices.
    https://www.xilinx.com/applications/data-center/v70.html

The QDMA subsystem is used in conjunction with the PCI Express IP block
to provide high performance data transfer between host memory and the
card's DMA subsystem.

            +-------+       +-------+       +-----------+
   PCIe     |       |       |       |       |           |
   Tx/Rx    |       |       |       |  AXI  |           |
 <=======>  | PCIE  | <===> | QDMA  | <====>| User Logic|
            |       |       |       |       |           |
            +-------+       +-------+       +-----------+

The primary mechanism to transfer data using the QDMA is for the QDMA
engine to operate on instructions (descriptors) provided by the host
operating system. Using the descriptors, the QDMA can move data in both
the Host to Card (H2C) direction, or the Card to Host (C2H) direction.
The QDMA provides a per-queue basis option whether DMA traffic goes
to an AXI4 memory map (MM) interface or to an AXI4-Stream interface.

The hardware detail is provided by
    https://docs.xilinx.com/r/en-US/pg302-qdma

Implements dmaengine APIs to support MM DMA transfers.
- probe the available DMA channels
- use dma_slave_map for channel lookup
- use virtual channel to manage dmaengine tx descriptors
- implement device_prep_slave_sg callback to handle host scatter gather
  list

Signed-off-by: Nishad Saraf <nishads@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://lore.kernel.org/r/20240819211948.688786-2-lizhi.hou@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/platform_data/amd_qdma.h | 36 ++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/platform_data/amd_qdma.h

(limited to 'include')

diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h
new file mode 100644
index 000000000000..576d952f97ed
--- /dev/null
+++ b/include/linux/platform_data/amd_qdma.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _PLATDATA_AMD_QDMA_H
+#define _PLATDATA_AMD_QDMA_H
+
+#include <linux/dmaengine.h>
+
+/**
+ * struct qdma_queue_info - DMA queue information. This information is used to
+ *			    match queue when DMA channel is requested
+ * @dir: Channel transfer direction
+ */
+struct qdma_queue_info {
+	enum dma_transfer_direction dir;
+};
+
+#define QDMA_FILTER_PARAM(qinfo)	((void *)(qinfo))
+
+struct dma_slave_map;
+
+/**
+ * struct qdma_platdata - Platform specific data for QDMA engine
+ * @max_mm_channels: Maximum number of MM DMA channels in each direction
+ * @device_map: DMA slave map
+ * @irq_index: The index of first IRQ
+ */
+struct qdma_platdata {
+	u32			max_mm_channels;
+	u32			irq_index;
+	struct dma_slave_map	*device_map;
+};
+
+#endif /* _PLATDATA_AMD_QDMA_H */
-- 
cgit v1.2.3


From 4bb59323450db610b06c549fa4d5936e94fb9a36 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Sat, 10 Aug 2024 17:45:40 +0800
Subject: dmaengine: ti: k3-udma: Remove unused declarations

Commit d70241913413 ("dmaengine: ti: k3-udma: Add glue layer for non DMAengine users")
declared but never implemented these.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Link: https://lore.kernel.org/r/20240810094540.2589310-1-yuehaibing@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/k3-udma-glue.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h
index 1e491c5dcac2..2dea217629d0 100644
--- a/include/linux/dma/k3-udma-glue.h
+++ b/include/linux/dma/k3-udma-glue.h
@@ -136,8 +136,6 @@ u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn,
 u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn);
 int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn,
 			    u32 flow_num);
-void k3_udma_glue_rx_put_irq(struct k3_udma_glue_rx_channel *rx_chn,
-			     u32 flow_num);
 void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
 		u32 flow_num, void *data,
 		void (*cleanup)(void *data, dma_addr_t desc_dma),
-- 
cgit v1.2.3


From fb234516c5a0728d7dbd718667c33c1523b55fe8 Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Mon, 26 Aug 2024 18:39:46 +0200
Subject: dt-bindings: clock: rockchip: remove CLK_NR_CLKS and CLKPMU_NR_CLKS

CLK_NR_CLKS and CLKPMU_NR_CLKS should not be part of the binding.
Remove since the kernel code no longer uses it.

Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/a3292ed0-3489-4887-8567-40ea4983c592@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/px30-cru.h   | 4 ----
 include/dt-bindings/clock/rk3036-cru.h | 2 --
 include/dt-bindings/clock/rk3228-cru.h | 2 --
 include/dt-bindings/clock/rk3288-cru.h | 2 --
 include/dt-bindings/clock/rk3308-cru.h | 2 --
 include/dt-bindings/clock/rk3328-cru.h | 2 --
 include/dt-bindings/clock/rk3368-cru.h | 2 --
 include/dt-bindings/clock/rk3399-cru.h | 4 ----
 8 files changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/px30-cru.h b/include/dt-bindings/clock/px30-cru.h
index 5b1416fcde6f..a2abf1995c34 100644
--- a/include/dt-bindings/clock/px30-cru.h
+++ b/include/dt-bindings/clock/px30-cru.h
@@ -175,8 +175,6 @@
 #define PCLK_CIF		352
 #define PCLK_OTP_PHY		353
 
-#define CLK_NR_CLKS		(PCLK_OTP_PHY + 1)
-
 /* pmu-clocks indices */
 
 #define PLL_GPLL		1
@@ -195,8 +193,6 @@
 #define PCLK_GPIO0_PMU		20
 #define PCLK_UART0_PMU		21
 
-#define CLKPMU_NR_CLKS		(PCLK_UART0_PMU + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0_PO		0
 #define SRST_CORE1_PO		1
diff --git a/include/dt-bindings/clock/rk3036-cru.h b/include/dt-bindings/clock/rk3036-cru.h
index a96a9870ad59..99cc617e1e54 100644
--- a/include/dt-bindings/clock/rk3036-cru.h
+++ b/include/dt-bindings/clock/rk3036-cru.h
@@ -94,8 +94,6 @@
 #define HCLK_CPU		477
 #define HCLK_PERI		478
 
-#define CLK_NR_CLKS		(HCLK_PERI + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0		0
 #define SRST_CORE1		1
diff --git a/include/dt-bindings/clock/rk3228-cru.h b/include/dt-bindings/clock/rk3228-cru.h
index de550ea56eeb..138b6ce514dd 100644
--- a/include/dt-bindings/clock/rk3228-cru.h
+++ b/include/dt-bindings/clock/rk3228-cru.h
@@ -146,8 +146,6 @@
 #define HCLK_S_CRYPTO		477
 #define HCLK_PERI		478
 
-#define CLK_NR_CLKS		(HCLK_PERI + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0_PO		0
 #define SRST_CORE1_PO		1
diff --git a/include/dt-bindings/clock/rk3288-cru.h b/include/dt-bindings/clock/rk3288-cru.h
index 33819acbfc56..c6034b01b050 100644
--- a/include/dt-bindings/clock/rk3288-cru.h
+++ b/include/dt-bindings/clock/rk3288-cru.h
@@ -195,8 +195,6 @@
 #define HCLK_CPU		477
 #define HCLK_PERI		478
 
-#define CLK_NR_CLKS		(HCLK_PERI + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0		0
 #define SRST_CORE1		1
diff --git a/include/dt-bindings/clock/rk3308-cru.h b/include/dt-bindings/clock/rk3308-cru.h
index d97840f9ee2e..ce4cd72b9d3d 100644
--- a/include/dt-bindings/clock/rk3308-cru.h
+++ b/include/dt-bindings/clock/rk3308-cru.h
@@ -212,8 +212,6 @@
 #define PCLK_CAN		233
 #define PCLK_OWIRE		234
 
-#define CLK_NR_CLKS		(PCLK_OWIRE + 1)
-
 /* soft-reset indices */
 
 /* cru_softrst_con0 */
diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h
index 555b4ff660ae..8885a2e98c65 100644
--- a/include/dt-bindings/clock/rk3328-cru.h
+++ b/include/dt-bindings/clock/rk3328-cru.h
@@ -201,8 +201,6 @@
 #define HCLK_RGA		340
 #define HCLK_HDCP		341
 
-#define CLK_NR_CLKS		(HCLK_HDCP + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0_PO		0
 #define SRST_CORE1_PO		1
diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h
index 83c72a163fd3..ebae3cbf8192 100644
--- a/include/dt-bindings/clock/rk3368-cru.h
+++ b/include/dt-bindings/clock/rk3368-cru.h
@@ -182,8 +182,6 @@
 #define HCLK_BUS		477
 #define HCLK_PERI		478
 
-#define CLK_NR_CLKS		(HCLK_PERI + 1)
-
 /* soft-reset indices */
 #define SRST_CORE_B0		0
 #define SRST_CORE_B1		1
diff --git a/include/dt-bindings/clock/rk3399-cru.h b/include/dt-bindings/clock/rk3399-cru.h
index 39169d94a44e..4c90c7703a83 100644
--- a/include/dt-bindings/clock/rk3399-cru.h
+++ b/include/dt-bindings/clock/rk3399-cru.h
@@ -335,8 +335,6 @@
 #define HCLK_SDIO_NOC			495
 #define HCLK_SDIOAUDIO_NOC		496
 
-#define CLK_NR_CLKS			(HCLK_SDIOAUDIO_NOC + 1)
-
 /* pmu-clocks indices */
 
 #define PLL_PPLL			1
@@ -378,8 +376,6 @@
 #define PCLK_INTR_ARB_PMU		49
 #define HCLK_NOC_PMU			50
 
-#define CLKPMU_NR_CLKS			(HCLK_NOC_PMU + 1)
-
 /* soft-reset indices */
 
 /* cru_softrst_con0 */
-- 
cgit v1.2.3


From 41845541adebc503b8574943c92670016d5e566b Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 23 Aug 2024 17:05:18 +0800
Subject: firmware: arm_scmi: Add initial support for i.MX BBM protocol

i.MX95 has a battery-backed module(BBM), which has persistent storage
(GPR), an RTC, and the ON/OFF button. The System Manager(SM) firmware
use SCMI vendor protocol(SCMI BBM) to let agent be able to use GPR, RTC
and ON/OFF button.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20240823-imx95-bbm-misc-v2-v8-2-e600ed9e9271@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_imx_protocol.h | 42 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 include/linux/scmi_imx_protocol.h

(limited to 'include')

diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h
new file mode 100644
index 000000000000..2df2ea0f1809
--- /dev/null
+++ b/include/linux/scmi_imx_protocol.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SCMI Message Protocol driver NXP extension header
+ *
+ * Copyright 2024 NXP.
+ */
+
+#ifndef _LINUX_SCMI_NXP_PROTOCOL_H
+#define _LINUX_SCMI_NXP_PROTOCOL_H
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+enum scmi_nxp_protocol {
+	SCMI_PROTOCOL_IMX_BBM = 0x81,
+};
+
+struct scmi_imx_bbm_proto_ops {
+	int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id,
+			    uint64_t sec);
+	int (*rtc_time_get)(const struct scmi_protocol_handle *ph, u32 id,
+			    u64 *val);
+	int (*rtc_alarm_set)(const struct scmi_protocol_handle *ph, u32 id,
+			     bool enable, u64 sec);
+	int (*button_get)(const struct scmi_protocol_handle *ph, u32 *state);
+};
+
+enum scmi_nxp_notification_events {
+	SCMI_EVENT_IMX_BBM_RTC = 0x0,
+	SCMI_EVENT_IMX_BBM_BUTTON = 0x1,
+};
+
+struct scmi_imx_bbm_notif_report {
+	bool			is_rtc;
+	bool			is_button;
+	ktime_t			timestamp;
+	unsigned int		rtc_id;
+	unsigned int		rtc_evt;
+};
+#endif
-- 
cgit v1.2.3


From 61c9f03e22fc57fe61726c513b1f92c0ed1ef00f Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 23 Aug 2024 17:05:19 +0800
Subject: firmware: arm_scmi: Add initial support for i.MX MISC protocol

i.MX95 System Manager(SM) firmware includes a SCMI vendor protocol, SCMI
MISC protocol which includes controls that are misc settings/actions that
must be exposed from the SM to agents. They are device specific and are
usually define to access bit fields in various mix block control modules,
IOMUX_GPR, and other General Purpose registers, Control Status Registers
owned by the SM.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20240823-imx95-bbm-misc-v2-v8-3-e600ed9e9271@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_imx_protocol.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h
index 2df2ea0f1809..066216f1357a 100644
--- a/include/linux/scmi_imx_protocol.h
+++ b/include/linux/scmi_imx_protocol.h
@@ -15,6 +15,7 @@
 
 enum scmi_nxp_protocol {
 	SCMI_PROTOCOL_IMX_BBM = 0x81,
+	SCMI_PROTOCOL_IMX_MISC = 0x84,
 };
 
 struct scmi_imx_bbm_proto_ops {
@@ -30,6 +31,7 @@ struct scmi_imx_bbm_proto_ops {
 enum scmi_nxp_notification_events {
 	SCMI_EVENT_IMX_BBM_RTC = 0x0,
 	SCMI_EVENT_IMX_BBM_BUTTON = 0x1,
+	SCMI_EVENT_IMX_MISC_CONTROL = 0x0,
 };
 
 struct scmi_imx_bbm_notif_report {
@@ -39,4 +41,19 @@ struct scmi_imx_bbm_notif_report {
 	unsigned int		rtc_id;
 	unsigned int		rtc_evt;
 };
+
+struct scmi_imx_misc_ctrl_notify_report {
+	ktime_t			timestamp;
+	unsigned int		ctrl_id;
+	unsigned int		flags;
+};
+
+struct scmi_imx_misc_proto_ops {
+	int (*misc_ctrl_set)(const struct scmi_protocol_handle *ph, u32 id,
+			     u32 num, u32 *val);
+	int (*misc_ctrl_get)(const struct scmi_protocol_handle *ph, u32 id,
+			     u32 *num, u32 *val);
+	int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph,
+				    u32 ctrl_id, u32 evt_id, u32 flags);
+};
 #endif
-- 
cgit v1.2.3


From 0b4f8a68b292e7ee82107b1ce15c3aad31c864b1 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 23 Aug 2024 17:05:21 +0800
Subject: firmware: imx: Add i.MX95 MISC driver

The i.MX95 System manager exports SCMI MISC protocol for linux to do
various settings, such as set board gpio expander as wakeup source.

The driver is to add the support.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20240823-imx95-bbm-misc-v2-v8-5-e600ed9e9271@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/firmware/imx/sm.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 include/linux/firmware/imx/sm.h

(limited to 'include')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
new file mode 100644
index 000000000000..62a2690e2abd
--- /dev/null
+++ b/include/linux/firmware/imx/sm.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2024 NXP
+ */
+
+#ifndef _SCMI_IMX_H
+#define _SCMI_IMX_H
+
+#include <linux/bitfield.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#define SCMI_IMX_CTRL_PDM_CLK_SEL	0	/* AON PDM clock sel */
+#define SCMI_IMX_CTRL_MQS1_SETTINGS	1	/* AON MQS settings */
+#define SCMI_IMX_CTRL_SAI1_MCLK		2	/* AON SAI1 MCLK */
+#define SCMI_IMX_CTRL_SAI3_MCLK		3	/* WAKE SAI3 MCLK */
+#define SCMI_IMX_CTRL_SAI4_MCLK		4	/* WAKE SAI4 MCLK */
+#define SCMI_IMX_CTRL_SAI5_MCLK		5	/* WAKE SAI5 MCLK */
+
+#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_EXT)
+int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
+int scmi_imx_misc_ctrl_set(u32 id, u32 val);
+#else
+static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val);
+{
+	return -EOPNOTSUPP;
+}
+#endif
+#endif
-- 
cgit v1.2.3


From 3e5cbbb1fb9a64588a2c6ddc5e432a303d36a488 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 27 Aug 2024 01:52:49 +0000
Subject: tcp: remove volatile qualifier on tw_substate

Using a volatile qualifier for a specific struct field is unusual.

Use instead READ_ONCE()/WRITE_ONCE() where necessary.

tcp_timewait_state_process() can change tw_substate while other
threads are reading this field.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20240827015250.3509197-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_timewait_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index f88b68269012..beb533a0e880 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -58,7 +58,7 @@ struct inet_timewait_sock {
 #define tw_dr			__tw_common.skc_tw_dr
 
 	__u32			tw_mark;
-	volatile unsigned char	tw_substate;
+	unsigned char		tw_substate;
 	unsigned char		tw_rcv_wscale;
 
 	/* Socket demultiplex comparisons on incoming packets. */
-- 
cgit v1.2.3


From 0870b0d8b393dde53106678a1e2cec9dfa52f9b7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 27 Aug 2024 11:49:16 +0000
Subject: net: busy-poll: use ktime_get_ns() instead of local_clock()

Typically, busy-polling durations are below 100 usec.

When/if the busy-poller thread migrates to another cpu,
local_clock() can be off by +/-2msec or more for small
values of HZ, depending on the platform.

Use ktimer_get_ns() to ensure deterministic behavior,
which is the whole point of busy-polling.

Fixes: 060212928670 ("net: add low latency socket poll")
Fixes: 9a3c71aa8024 ("net: convert low latency sockets to sched_clock()")
Fixes: 37089834528b ("sched, net: Fixup busy_loop_us_clock()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20240827114916.223377-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/busy_poll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9b09acac538e..522f1da8b747 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -68,7 +68,7 @@ static inline bool sk_can_busy_loop(struct sock *sk)
 static inline unsigned long busy_loop_current_time(void)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
-	return (unsigned long)(local_clock() >> 10);
+	return (unsigned long)(ktime_get_ns() >> 10);
 #else
 	return 0;
 #endif
-- 
cgit v1.2.3


From 3e3ac9c39e1b575e970ecab90504b6cb090f6b05 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 11:20:05 +0800
Subject: scsi: core: Remove obsoleted declaration for scsi_driverbyte_string()

scsi_driverbyte_string() has been unused since commit 54c29086195f ("scsi:
core: Drop the now obsolete driver_byte definitions"). Remove it.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20240826032005.4007834-1-cuigaosheng1@huawei.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_dbg.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_dbg.h b/include/scsi/scsi_dbg.h
index 7b196d234626..bd29cdb513a5 100644
--- a/include/scsi/scsi_dbg.h
+++ b/include/scsi/scsi_dbg.h
@@ -24,7 +24,6 @@ extern const char *scsi_extd_sense_format(unsigned char, unsigned char,
 					  const char **);
 extern const char *scsi_mlreturn_string(int);
 extern const char *scsi_hostbyte_string(int);
-extern const char *scsi_driverbyte_string(int);
 #else
 static inline bool
 scsi_opcode_sa_name(int cmd, int sa,
@@ -76,12 +75,6 @@ scsi_hostbyte_string(int result)
 	return NULL;
 }
 
-static inline const char *
-scsi_driverbyte_string(int result)
-{
-	return NULL;
-}
-
 #endif
 
 #endif /* _SCSI_SCSI_DBG_H */
-- 
cgit v1.2.3


From c42a01264ba1497eb3193c08ff3c2656d98250a6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Jul 2024 06:10:24 +0200
Subject: dma-mapping: don't return errors from dma_set_min_align_mask

A NULL dev->dma_parms indicates either a bus that is not DMA capable or
grave bug in the implementation of the bus code.

There isn't much the driver can do in terms of error handling for either
case, so just warn and continue as DMA operations will fail anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/dma-mapping.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f693aafe221f..cfd6bafec3f9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -575,13 +575,12 @@ static inline unsigned int dma_get_min_align_mask(struct device *dev)
 	return 0;
 }
 
-static inline int dma_set_min_align_mask(struct device *dev,
+static inline void dma_set_min_align_mask(struct device *dev,
 		unsigned int min_align_mask)
 {
 	if (WARN_ON_ONCE(!dev->dma_parms))
-		return -EIO;
+		return;
 	dev->dma_parms->min_align_mask = min_align_mask;
-	return 0;
 }
 
 #ifndef dma_get_cache_alignment
-- 
cgit v1.2.3


From 560a861ab4174b42240157ab5cebe36b8c7bc418 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Jul 2024 06:06:28 +0200
Subject: dma-mapping: don't return errors from dma_set_seg_boundary

A NULL dev->dma_parms indicates either a bus that is not DMA capable or
grave bug in the implementation of the bus code.

There isn't much the driver can do in terms of error handling for either
case, so just warn and continue as DMA operations will fail anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/dma-mapping.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index cfd6bafec3f9..6bd1333dbacb 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -559,13 +559,11 @@ static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev,
 	return (dma_get_seg_boundary(dev) >> page_shift) + 1;
 }
 
-static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
+static inline void dma_set_seg_boundary(struct device *dev, unsigned long mask)
 {
-	if (dev->dma_parms) {
-		dev->dma_parms->segment_boundary_mask = mask;
-		return 0;
-	}
-	return -EIO;
+	if (WARN_ON_ONCE(!dev->dma_parms))
+		return;
+	dev->dma_parms->segment_boundary_mask = mask;
 }
 
 static inline unsigned int dma_get_min_align_mask(struct device *dev)
-- 
cgit v1.2.3


From 334304ac2baca7f3e821c47cf5129d90e7a6b1e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Jul 2024 06:07:38 +0200
Subject: dma-mapping: don't return errors from dma_set_max_seg_size

A NULL dev->dma_parms indicates either a bus that is not DMA capable or
grave bug in the implementation of the bus code.

There isn't much the driver can do in terms of error handling for either
case, so just warn and continue as DMA operations will fail anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
---
 include/linux/dma-mapping.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 6bd1333dbacb..1524da363734 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -524,13 +524,11 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev)
 	return SZ_64K;
 }
 
-static inline int dma_set_max_seg_size(struct device *dev, unsigned int size)
+static inline void dma_set_max_seg_size(struct device *dev, unsigned int size)
 {
-	if (dev->dma_parms) {
-		dev->dma_parms->max_segment_size = size;
-		return 0;
-	}
-	return -EIO;
+	if (WARN_ON_ONCE(!dev->dma_parms))
+		return;
+	dev->dma_parms->max_segment_size = size;
 }
 
 static inline unsigned long dma_get_seg_boundary(struct device *dev)
-- 
cgit v1.2.3


From b4f62001ccd3fa953769ccbd313c9a7a4f5f8f3d Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Thu, 29 Aug 2024 09:18:47 +0800
Subject: dt-bindings: clock: add RMII clock selection

Add RMII clock selection for ENETC0 and ENETC1.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240829011849.364987-3-wei.fang@nxp.com
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
---
 include/dt-bindings/clock/nxp,imx95-clock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/nxp,imx95-clock.h b/include/dt-bindings/clock/nxp,imx95-clock.h
index 782662c3e740..b7a713a9ac8c 100644
--- a/include/dt-bindings/clock/nxp,imx95-clock.h
+++ b/include/dt-bindings/clock/nxp,imx95-clock.h
@@ -25,4 +25,7 @@
 #define IMX95_CLK_DISPMIX_ENG0_SEL		0
 #define IMX95_CLK_DISPMIX_ENG1_SEL		1
 
+#define IMX95_CLK_NETCMIX_ENETC0_RMII		0
+#define IMX95_CLK_NETCMIX_ENETC1_RMII		1
+
 #endif	/* __DT_BINDINGS_CLOCK_IMX95_H */
-- 
cgit v1.2.3


From 33929707b808ba7839c40c15d3e68cbc51070b31 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 12 Aug 2024 15:23:10 +0300
Subject: drm/mst: switch to guid_t type for GUID

The kernel has a guid_t type for GUIDs. Switch to using it, but avoid
any functional changes here.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240812122312.1567046-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/display/drm_dp_mst_helper.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index cfe096389d94..dd466631f174 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -244,18 +244,18 @@ struct drm_dp_mst_branch {
 	bool link_address_sent;
 
 	/* global unique identifier to identify branch devices */
-	u8 guid[16];
+	guid_t guid;
 };
 
 
 struct drm_dp_nak_reply {
-	u8 guid[16];
+	guid_t guid;
 	u8 reason;
 	u8 nak_data;
 };
 
 struct drm_dp_link_address_ack_reply {
-	u8 guid[16];
+	guid_t guid;
 	u8 nports;
 	struct drm_dp_link_addr_reply_port {
 		bool input_port;
@@ -265,7 +265,7 @@ struct drm_dp_link_address_ack_reply {
 		bool ddps;
 		bool legacy_device_plug_status;
 		u8 dpcd_revision;
-		u8 peer_guid[16];
+		guid_t peer_guid;
 		u8 num_sdp_streams;
 		u8 num_sdp_stream_sinks;
 	} ports[16];
@@ -348,7 +348,7 @@ struct drm_dp_allocate_payload_ack_reply {
 };
 
 struct drm_dp_connection_status_notify {
-	u8 guid[16];
+	guid_t guid;
 	u8 port_number;
 	bool legacy_device_plug_status;
 	bool displayport_device_plug_status;
@@ -425,7 +425,7 @@ struct drm_dp_query_payload {
 
 struct drm_dp_resource_status_notify {
 	u8 port_number;
-	u8 guid[16];
+	guid_t guid;
 	u16 available_pbn;
 };
 
-- 
cgit v1.2.3


From 49c04453db81fc806906e26ef9fc53bdb635ff39 Mon Sep 17 00:00:00 2001
From: Detlev Casanova <detlev.casanova@collabora.com>
Date: Wed, 28 Aug 2024 15:42:50 +0000
Subject: dt-bindings: clock, reset: Add support for rk3576

Add clock and reset ID defines for rk3576.

Compared to the downstream bindings written by Elaine, this uses
continous gapless IDs starting at 0. Thus all numbers are
different between downstream and upstream, but names are kept
exactly the same.

Also add documentation for the rk3576 CRU core.

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/0102019199a76766-f3a2b53f-d063-458b-b0d1-dfbc2ea1893c-000000@eu-west-1.amazonses.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rockchip,rk3576-cru.h | 592 ++++++++++++++++++++++++
 include/dt-bindings/reset/rockchip,rk3576-cru.h | 564 ++++++++++++++++++++++
 2 files changed, 1156 insertions(+)
 create mode 100644 include/dt-bindings/clock/rockchip,rk3576-cru.h
 create mode 100644 include/dt-bindings/reset/rockchip,rk3576-cru.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/rockchip,rk3576-cru.h b/include/dt-bindings/clock/rockchip,rk3576-cru.h
new file mode 100644
index 000000000000..a2933021be89
--- /dev/null
+++ b/include/dt-bindings/clock/rockchip,rk3576-cru.h
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+* Copyright (c) 2023 Rockchip Electronics Co. Ltd.
+* Copyright (c) 2024 Collabora Ltd.
+*
+* Author: Elaine Zhang <zhangqing@rock-chips.com>
+* Author: Detlev Casanova <detlev.casanova@collabora.com>
+*/
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H
+
+/* cru-clocks indices */
+
+/* cru plls */
+#define PLL_BPLL			0
+#define PLL_LPLL			1
+#define PLL_VPLL			2
+#define PLL_AUPLL			3
+#define PLL_CPLL			4
+#define PLL_GPLL			5
+#define PLL_PPLL			6
+#define ARMCLK_L			7
+#define ARMCLK_B			8
+
+/* cru clocks */
+#define CLK_CPLL_DIV20			9
+#define CLK_CPLL_DIV10			10
+#define CLK_GPLL_DIV8			11
+#define CLK_GPLL_DIV6			12
+#define CLK_CPLL_DIV4			13
+#define CLK_GPLL_DIV4			14
+#define CLK_SPLL_DIV2			15
+#define CLK_GPLL_DIV3			16
+#define CLK_CPLL_DIV2			17
+#define CLK_GPLL_DIV2			18
+#define CLK_SPLL_DIV1			19
+#define PCLK_TOP_ROOT			20
+#define ACLK_TOP			21
+#define HCLK_TOP			22
+#define CLK_AUDIO_FRAC_0		23
+#define CLK_AUDIO_FRAC_1		24
+#define CLK_AUDIO_FRAC_2		25
+#define CLK_AUDIO_FRAC_3		26
+#define CLK_UART_FRAC_0			27
+#define CLK_UART_FRAC_1			28
+#define CLK_UART_FRAC_2			29
+#define CLK_UART1_SRC_TOP		30
+#define CLK_AUDIO_INT_0			31
+#define CLK_AUDIO_INT_1			32
+#define CLK_AUDIO_INT_2			33
+#define CLK_PDM0_SRC_TOP		34
+#define CLK_PDM1_OUT			35
+#define CLK_GMAC0_125M_SRC		36
+#define CLK_GMAC1_125M_SRC		37
+#define LCLK_ASRC_SRC_0			38
+#define LCLK_ASRC_SRC_1			39
+#define REF_CLK0_OUT_PLL		40
+#define REF_CLK1_OUT_PLL		41
+#define REF_CLK2_OUT_PLL		42
+#define REFCLKO25M_GMAC0_OUT		43
+#define REFCLKO25M_GMAC1_OUT		44
+#define CLK_CIFOUT_OUT			45
+#define CLK_GMAC0_RMII_CRU		46
+#define CLK_GMAC1_RMII_CRU		47
+#define CLK_OTPC_AUTO_RD_G		48
+#define CLK_OTP_PHY_G			49
+#define CLK_MIPI_CAMERAOUT_M0		50
+#define CLK_MIPI_CAMERAOUT_M1		51
+#define CLK_MIPI_CAMERAOUT_M2		52
+#define MCLK_PDM0_SRC_TOP		53
+#define HCLK_AUDIO_ROOT			54
+#define HCLK_ASRC_2CH_0			55
+#define HCLK_ASRC_2CH_1			56
+#define HCLK_ASRC_4CH_0			57
+#define HCLK_ASRC_4CH_1			58
+#define CLK_ASRC_2CH_0			59
+#define CLK_ASRC_2CH_1			60
+#define CLK_ASRC_4CH_0			61
+#define CLK_ASRC_4CH_1			62
+#define MCLK_SAI0_8CH_SRC		63
+#define MCLK_SAI0_8CH			64
+#define HCLK_SAI0_8CH			65
+#define HCLK_SPDIF_RX0			66
+#define MCLK_SPDIF_RX0			67
+#define HCLK_SPDIF_RX1			68
+#define MCLK_SPDIF_RX1			69
+#define MCLK_SAI1_8CH_SRC		70
+#define MCLK_SAI1_8CH			71
+#define HCLK_SAI1_8CH			72
+#define MCLK_SAI2_2CH_SRC		73
+#define MCLK_SAI2_2CH			74
+#define HCLK_SAI2_2CH			75
+#define MCLK_SAI3_2CH_SRC		76
+#define MCLK_SAI3_2CH			77
+#define HCLK_SAI3_2CH			78
+#define MCLK_SAI4_2CH_SRC		79
+#define MCLK_SAI4_2CH			80
+#define HCLK_SAI4_2CH			81
+#define HCLK_ACDCDIG_DSM		82
+#define MCLK_ACDCDIG_DSM		83
+#define CLK_PDM1			84
+#define HCLK_PDM1			85
+#define MCLK_PDM1			86
+#define HCLK_SPDIF_TX0			87
+#define MCLK_SPDIF_TX0			88
+#define HCLK_SPDIF_TX1			89
+#define MCLK_SPDIF_TX1			90
+#define CLK_SAI1_MCLKOUT		91
+#define CLK_SAI2_MCLKOUT		92
+#define CLK_SAI3_MCLKOUT		93
+#define CLK_SAI4_MCLKOUT		94
+#define CLK_SAI0_MCLKOUT		95
+#define HCLK_BUS_ROOT			96
+#define PCLK_BUS_ROOT			97
+#define ACLK_BUS_ROOT			98
+#define HCLK_CAN0			99
+#define CLK_CAN0			100
+#define HCLK_CAN1			101
+#define CLK_CAN1			102
+#define CLK_KEY_SHIFT			103
+#define PCLK_I2C1			104
+#define PCLK_I2C2			105
+#define PCLK_I2C3			106
+#define PCLK_I2C4			107
+#define PCLK_I2C5			108
+#define PCLK_I2C6			109
+#define PCLK_I2C7			110
+#define PCLK_I2C8			111
+#define PCLK_I2C9			112
+#define PCLK_WDT_BUSMCU			113
+#define TCLK_WDT_BUSMCU			114
+#define ACLK_GIC			115
+#define CLK_I2C1			116
+#define CLK_I2C2			117
+#define CLK_I2C3			118
+#define CLK_I2C4			119
+#define CLK_I2C5			120
+#define CLK_I2C6			121
+#define CLK_I2C7			122
+#define CLK_I2C8			123
+#define CLK_I2C9			124
+#define PCLK_SARADC			125
+#define CLK_SARADC			126
+#define PCLK_TSADC			127
+#define CLK_TSADC			128
+#define PCLK_UART0			129
+#define PCLK_UART2			130
+#define PCLK_UART3			131
+#define PCLK_UART4			132
+#define PCLK_UART5			133
+#define PCLK_UART6			134
+#define PCLK_UART7			135
+#define PCLK_UART8			136
+#define PCLK_UART9			137
+#define PCLK_UART10			138
+#define PCLK_UART11			139
+#define SCLK_UART0			140
+#define SCLK_UART2			141
+#define SCLK_UART3			142
+#define SCLK_UART4			143
+#define SCLK_UART5			144
+#define SCLK_UART6			145
+#define SCLK_UART7			146
+#define SCLK_UART8			147
+#define SCLK_UART9			148
+#define SCLK_UART10			149
+#define SCLK_UART11			150
+#define PCLK_SPI0			151
+#define PCLK_SPI1			152
+#define PCLK_SPI2			153
+#define PCLK_SPI3			154
+#define PCLK_SPI4			155
+#define CLK_SPI0			156
+#define CLK_SPI1			157
+#define CLK_SPI2			158
+#define CLK_SPI3			159
+#define CLK_SPI4			160
+#define PCLK_WDT0			161
+#define TCLK_WDT0			162
+#define PCLK_PWM1			163
+#define CLK_PWM1			164
+#define CLK_OSC_PWM1			165
+#define CLK_RC_PWM1			166
+#define PCLK_BUSTIMER0			167
+#define PCLK_BUSTIMER1			168
+#define CLK_TIMER0_ROOT			169
+#define CLK_TIMER0			170
+#define CLK_TIMER1			171
+#define CLK_TIMER2			172
+#define CLK_TIMER3			173
+#define CLK_TIMER4			174
+#define CLK_TIMER5			175
+#define PCLK_MAILBOX0			176
+#define PCLK_GPIO1			177
+#define DBCLK_GPIO1			178
+#define PCLK_GPIO2			179
+#define DBCLK_GPIO2			180
+#define PCLK_GPIO3			181
+#define DBCLK_GPIO3			182
+#define PCLK_GPIO4			183
+#define DBCLK_GPIO4			184
+#define ACLK_DECOM			185
+#define PCLK_DECOM			186
+#define DCLK_DECOM			187
+#define CLK_TIMER1_ROOT			188
+#define CLK_TIMER6			189
+#define CLK_TIMER7			190
+#define CLK_TIMER8			191
+#define CLK_TIMER9			192
+#define CLK_TIMER10			193
+#define CLK_TIMER11			194
+#define ACLK_DMAC0			195
+#define ACLK_DMAC1			196
+#define ACLK_DMAC2			197
+#define ACLK_SPINLOCK			198
+#define HCLK_I3C0			199
+#define HCLK_I3C1			200
+#define HCLK_BUS_CM0_ROOT		201
+#define FCLK_BUS_CM0_CORE		202
+#define CLK_BUS_CM0_RTC			203
+#define PCLK_PMU2			204
+#define PCLK_PWM2			205
+#define CLK_PWM2			206
+#define CLK_RC_PWM2			207
+#define CLK_OSC_PWM2			208
+#define CLK_FREQ_PWM1			209
+#define CLK_COUNTER_PWM1		210
+#define SAI_SCLKIN_FREQ			211
+#define SAI_SCLKIN_COUNTER		212
+#define CLK_I3C0			213
+#define CLK_I3C1			214
+#define PCLK_CSIDPHY1			215
+#define PCLK_DDR_ROOT			216
+#define PCLK_DDR_MON_CH0		217
+#define TMCLK_DDR_MON_CH0		218
+#define ACLK_DDR_ROOT			219
+#define HCLK_DDR_ROOT			220
+#define FCLK_DDR_CM0_CORE		221
+#define CLK_DDR_TIMER_ROOT		222
+#define CLK_DDR_TIMER0			223
+#define CLK_DDR_TIMER1			224
+#define TCLK_WDT_DDR			225
+#define PCLK_WDT			226
+#define PCLK_TIMER			227
+#define CLK_DDR_CM0_RTC			228
+#define ACLK_RKNN0			229
+#define ACLK_RKNN1			230
+#define HCLK_RKNN_ROOT			231
+#define CLK_RKNN_DSU0			232
+#define PCLK_NPUTOP_ROOT		233
+#define PCLK_NPU_TIMER			234
+#define CLK_NPUTIMER_ROOT		235
+#define CLK_NPUTIMER0			236
+#define CLK_NPUTIMER1			237
+#define PCLK_NPU_WDT			238
+#define TCLK_NPU_WDT			239
+#define ACLK_RKNN_CBUF			240
+#define HCLK_NPU_CM0_ROOT		241
+#define FCLK_NPU_CM0_CORE		242
+#define CLK_NPU_CM0_RTC			243
+#define HCLK_RKNN_CBUF			244
+#define HCLK_NVM_ROOT			245
+#define ACLK_NVM_ROOT			246
+#define SCLK_FSPI_X2			247
+#define HCLK_FSPI			248
+#define CCLK_SRC_EMMC			249
+#define HCLK_EMMC			250
+#define ACLK_EMMC			251
+#define BCLK_EMMC			252
+#define TCLK_EMMC			253
+#define PCLK_PHP_ROOT			254
+#define ACLK_PHP_ROOT			255
+#define PCLK_PCIE0			256
+#define CLK_PCIE0_AUX			257
+#define ACLK_PCIE0_MST			258
+#define ACLK_PCIE0_SLV			259
+#define ACLK_PCIE0_DBI			260
+#define ACLK_USB3OTG1			261
+#define CLK_REF_USB3OTG1		262
+#define CLK_SUSPEND_USB3OTG1		263
+#define ACLK_MMU0			264
+#define ACLK_SLV_MMU0			265
+#define ACLK_MMU1			266
+#define ACLK_SLV_MMU1			267
+#define PCLK_PCIE1			268
+#define CLK_PCIE1_AUX			269
+#define ACLK_PCIE1_MST			270
+#define ACLK_PCIE1_SLV			271
+#define ACLK_PCIE1_DBI			272
+#define CLK_RXOOB0			273
+#define CLK_RXOOB1			274
+#define CLK_PMALIVE0			275
+#define CLK_PMALIVE1			276
+#define ACLK_SATA0			277
+#define ACLK_SATA1			278
+#define CLK_USB3OTG1_PIPE_PCLK		279
+#define CLK_USB3OTG1_UTMI		280
+#define CLK_USB3OTG0_PIPE_PCLK		281
+#define CLK_USB3OTG0_UTMI		282
+#define HCLK_SDGMAC_ROOT		283
+#define ACLK_SDGMAC_ROOT		284
+#define PCLK_SDGMAC_ROOT		285
+#define ACLK_GMAC0			286
+#define ACLK_GMAC1			287
+#define PCLK_GMAC0			288
+#define PCLK_GMAC1			289
+#define CCLK_SRC_SDIO			290
+#define HCLK_SDIO			291
+#define CLK_GMAC1_PTP_REF		292
+#define CLK_GMAC0_PTP_REF		293
+#define CLK_GMAC1_PTP_REF_SRC		294
+#define CLK_GMAC0_PTP_REF_SRC		295
+#define CCLK_SRC_SDMMC0			296
+#define HCLK_SDMMC0			297
+#define SCLK_FSPI1_X2			298
+#define HCLK_FSPI1			299
+#define ACLK_DSMC_ROOT			300
+#define ACLK_DSMC			301
+#define PCLK_DSMC			302
+#define CLK_DSMC_SYS			303
+#define HCLK_HSGPIO			304
+#define CLK_HSGPIO_TX			305
+#define CLK_HSGPIO_RX			306
+#define ACLK_HSGPIO			307
+#define PCLK_PHPPHY_ROOT		308
+#define PCLK_PCIE2_COMBOPHY0		309
+#define PCLK_PCIE2_COMBOPHY1		310
+#define CLK_PCIE_100M_SRC		311
+#define CLK_PCIE_100M_NDUTY_SRC		312
+#define CLK_REF_PCIE0_PHY		313
+#define CLK_REF_PCIE1_PHY		314
+#define CLK_REF_MPHY_26M		315
+#define HCLK_RKVDEC_ROOT		316
+#define ACLK_RKVDEC_ROOT		317
+#define HCLK_RKVDEC			318
+#define CLK_RKVDEC_HEVC_CA		319
+#define CLK_RKVDEC_CORE			320
+#define ACLK_UFS_ROOT			321
+#define ACLK_USB_ROOT			322
+#define PCLK_USB_ROOT			323
+#define ACLK_USB3OTG0			324
+#define CLK_REF_USB3OTG0		325
+#define CLK_SUSPEND_USB3OTG0		326
+#define ACLK_MMU2			327
+#define ACLK_SLV_MMU2			328
+#define ACLK_UFS_SYS			329
+#define ACLK_VPU_ROOT			330
+#define ACLK_VPU_MID_ROOT		331
+#define HCLK_VPU_ROOT			332
+#define ACLK_JPEG_ROOT			333
+#define ACLK_VPU_LOW_ROOT		334
+#define HCLK_RGA2E_0			335
+#define ACLK_RGA2E_0			336
+#define CLK_CORE_RGA2E_0		337
+#define ACLK_JPEG			338
+#define HCLK_JPEG			339
+#define HCLK_VDPP			340
+#define ACLK_VDPP			341
+#define CLK_CORE_VDPP			342
+#define HCLK_RGA2E_1			343
+#define ACLK_RGA2E_1			344
+#define CLK_CORE_RGA2E_1		345
+#define DCLK_EBC_FRAC_SRC		346
+#define HCLK_EBC			347
+#define ACLK_EBC			348
+#define DCLK_EBC			349
+#define HCLK_VEPU0_ROOT			350
+#define ACLK_VEPU0_ROOT			351
+#define HCLK_VEPU0			352
+#define ACLK_VEPU0			353
+#define CLK_VEPU0_CORE			354
+#define ACLK_VI_ROOT			355
+#define HCLK_VI_ROOT			356
+#define PCLK_VI_ROOT			357
+#define DCLK_VICAP			358
+#define ACLK_VICAP			359
+#define HCLK_VICAP			360
+#define CLK_ISP_CORE			361
+#define CLK_ISP_CORE_MARVIN		362
+#define CLK_ISP_CORE_VICAP		363
+#define ACLK_ISP			364
+#define HCLK_ISP			365
+#define ACLK_VPSS			366
+#define HCLK_VPSS			367
+#define CLK_CORE_VPSS			368
+#define PCLK_CSI_HOST_0			369
+#define PCLK_CSI_HOST_1			370
+#define PCLK_CSI_HOST_2			371
+#define PCLK_CSI_HOST_3			372
+#define PCLK_CSI_HOST_4			373
+#define ICLK_CSIHOST01			374
+#define ICLK_CSIHOST0			375
+#define CLK_ISP_PVTPLL_SRC		376
+#define ACLK_VI_ROOT_INTER		377
+#define CLK_VICAP_I0CLK			378
+#define CLK_VICAP_I1CLK			379
+#define CLK_VICAP_I2CLK			380
+#define CLK_VICAP_I3CLK			381
+#define CLK_VICAP_I4CLK			382
+#define ACLK_VOP_ROOT			383
+#define HCLK_VOP_ROOT			384
+#define PCLK_VOP_ROOT			385
+#define HCLK_VOP			386
+#define ACLK_VOP			387
+#define DCLK_VP0_SRC			388
+#define DCLK_VP1_SRC			389
+#define DCLK_VP2_SRC			390
+#define DCLK_VP0			391
+#define DCLK_VP1			392
+#define DCLK_VP2			393
+#define PCLK_VOPGRF			394
+#define ACLK_VO0_ROOT			395
+#define HCLK_VO0_ROOT			396
+#define PCLK_VO0_ROOT			397
+#define PCLK_VO0_GRF			398
+#define ACLK_HDCP0			399
+#define HCLK_HDCP0			400
+#define PCLK_HDCP0			401
+#define CLK_TRNG0_SKP			402
+#define PCLK_DSIHOST0			403
+#define CLK_DSIHOST0			404
+#define PCLK_HDMITX0			405
+#define CLK_HDMITX0_EARC		406
+#define CLK_HDMITX0_REF			407
+#define PCLK_EDP0			408
+#define CLK_EDP0_24M			409
+#define CLK_EDP0_200M			410
+#define MCLK_SAI5_8CH_SRC		411
+#define MCLK_SAI5_8CH			412
+#define HCLK_SAI5_8CH			413
+#define MCLK_SAI6_8CH_SRC		414
+#define MCLK_SAI6_8CH			415
+#define HCLK_SAI6_8CH			416
+#define HCLK_SPDIF_TX2			417
+#define MCLK_SPDIF_TX2			418
+#define HCLK_SPDIF_RX2			419
+#define MCLK_SPDIF_RX2			420
+#define HCLK_SAI8_8CH			421
+#define MCLK_SAI8_8CH_SRC		422
+#define MCLK_SAI8_8CH			423
+#define ACLK_VO1_ROOT			424
+#define HCLK_VO1_ROOT			425
+#define PCLK_VO1_ROOT			426
+#define MCLK_SAI7_8CH_SRC		427
+#define MCLK_SAI7_8CH			428
+#define HCLK_SAI7_8CH			429
+#define HCLK_SPDIF_TX3			430
+#define HCLK_SPDIF_TX4			431
+#define HCLK_SPDIF_TX5			432
+#define MCLK_SPDIF_TX3			433
+#define CLK_AUX16MHZ_0			434
+#define ACLK_DP0			435
+#define PCLK_DP0			436
+#define PCLK_VO1_GRF			437
+#define ACLK_HDCP1			438
+#define HCLK_HDCP1			439
+#define PCLK_HDCP1			440
+#define CLK_TRNG1_SKP			441
+#define HCLK_SAI9_8CH			442
+#define MCLK_SAI9_8CH_SRC		443
+#define MCLK_SAI9_8CH			444
+#define MCLK_SPDIF_TX4			445
+#define MCLK_SPDIF_TX5			446
+#define CLK_GPU_SRC_PRE			447
+#define CLK_GPU				448
+#define PCLK_GPU_ROOT			449
+#define ACLK_CENTER_ROOT		450
+#define ACLK_CENTER_LOW_ROOT		451
+#define HCLK_CENTER_ROOT		452
+#define PCLK_CENTER_ROOT		453
+#define ACLK_DMA2DDR			454
+#define ACLK_DDR_SHAREMEM		455
+#define PCLK_DMA2DDR			456
+#define PCLK_SHAREMEM			457
+#define HCLK_VEPU1_ROOT			458
+#define ACLK_VEPU1_ROOT			459
+#define HCLK_VEPU1			460
+#define ACLK_VEPU1			461
+#define CLK_VEPU1_CORE			462
+#define CLK_JDBCK_DAP			463
+#define PCLK_MIPI_DCPHY			464
+#define CLK_32K_USB2DEBUG		465
+#define PCLK_CSIDPHY			466
+#define PCLK_USBDPPHY			467
+#define CLK_PMUPHY_REF_SRC		468
+#define CLK_USBDP_COMBO_PHY_IMMORTAL	469
+#define CLK_HDMITXHDP			470
+#define PCLK_MPHY			471
+#define CLK_REF_OSC_MPHY		472
+#define CLK_REF_UFS_CLKOUT		473
+#define HCLK_PMU1_ROOT			474
+#define HCLK_PMU_CM0_ROOT		475
+#define CLK_200M_PMU_SRC		476
+#define CLK_100M_PMU_SRC		477
+#define CLK_50M_PMU_SRC			478
+#define FCLK_PMU_CM0_CORE		479
+#define CLK_PMU_CM0_RTC			480
+#define PCLK_PMU1			481
+#define CLK_PMU1			482
+#define PCLK_PMU1WDT			483
+#define TCLK_PMU1WDT			484
+#define PCLK_PMUTIMER			485
+#define CLK_PMUTIMER_ROOT		486
+#define CLK_PMUTIMER0			487
+#define CLK_PMUTIMER1			488
+#define PCLK_PMU1PWM			489
+#define CLK_PMU1PWM			490
+#define CLK_PMU1PWM_OSC			491
+#define PCLK_PMUPHY_ROOT		492
+#define PCLK_I2C0			493
+#define CLK_I2C0			494
+#define SCLK_UART1			495
+#define PCLK_UART1			496
+#define CLK_PMU1PWM_RC			497
+#define CLK_PDM0			498
+#define HCLK_PDM0			499
+#define MCLK_PDM0			500
+#define HCLK_VAD			501
+#define CLK_OSCCHK_PVTM			502
+#define CLK_PDM0_OUT			503
+#define CLK_HPTIMER_SRC			504
+#define PCLK_PMU0_ROOT			505
+#define PCLK_PMU0			506
+#define PCLK_GPIO0			507
+#define DBCLK_GPIO0			508
+#define CLK_OSC0_PMU1			509
+#define PCLK_PMU1_ROOT			510
+#define XIN_OSC0_DIV			511
+#define ACLK_USB			512
+#define ACLK_UFS			513
+#define ACLK_SDGMAC			514
+#define HCLK_SDGMAC			515
+#define PCLK_SDGMAC			516
+#define HCLK_VO1			517
+#define HCLK_VO0			518
+#define PCLK_CCI_ROOT			519
+#define ACLK_CCI_ROOT			520
+#define HCLK_VO0VOP_CHANNEL		521
+#define ACLK_VO0VOP_CHANNEL		522
+#define ACLK_TOP_MID			523
+#define ACLK_SECURE_HIGH		524
+#define CLK_USBPHY_REF_SRC		525
+#define CLK_PHY_REF_SRC			526
+#define CLK_CPLL_REF_SRC		527
+#define CLK_AUPLL_REF_SRC		528
+#define PCLK_SECURE_NS			529
+#define HCLK_SECURE_NS			530
+#define ACLK_SECURE_NS			531
+#define PCLK_OTPC_NS			532
+#define HCLK_CRYPTO_NS			533
+#define HCLK_TRNG_NS			534
+#define CLK_OTPC_NS			535
+#define SCLK_DSU			536
+#define SCLK_DDR			537
+#define ACLK_CRYPTO_NS			538
+#define CLK_PKA_CRYPTO_NS		539
+#define ACLK_RKVDEC_ROOT_BAK		540
+#define CLK_AUDIO_FRAC_0_SRC		541
+#define CLK_AUDIO_FRAC_1_SRC		542
+#define CLK_AUDIO_FRAC_2_SRC		543
+#define CLK_AUDIO_FRAC_3_SRC		544
+#define PCLK_HDPTX_APB			545
+
+/* secure clk */
+#define CLK_STIMER0_ROOT		546
+#define CLK_STIMER1_ROOT		547
+#define PCLK_SECURE_S			548
+#define HCLK_SECURE_S			549
+#define ACLK_SECURE_S			550
+#define CLK_PKA_CRYPTO_S		551
+#define HCLK_VO1_S			552
+#define PCLK_VO1_S			553
+#define HCLK_VO0_S			554
+#define PCLK_VO0_S			555
+#define PCLK_KLAD			556
+#define HCLK_CRYPTO_S			557
+#define HCLK_KLAD			558
+#define ACLK_CRYPTO_S			559
+#define HCLK_TRNG_S			560
+#define PCLK_OTPC_S			561
+#define CLK_OTPC_S			562
+#define PCLK_WDT_S			563
+#define TCLK_WDT_S			564
+#define PCLK_HDCP0_TRNG			565
+#define PCLK_HDCP1_TRNG			566
+#define HCLK_HDCP_KEY0			567
+#define HCLK_HDCP_KEY1			568
+#define PCLK_EDP_S			569
+#define ACLK_KLAD			570
+
+#endif
diff --git a/include/dt-bindings/reset/rockchip,rk3576-cru.h b/include/dt-bindings/reset/rockchip,rk3576-cru.h
new file mode 100644
index 000000000000..291fec0ecba0
--- /dev/null
+++ b/include/dt-bindings/reset/rockchip,rk3576-cru.h
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+* Copyright (c) 2023 Rockchip Electronics Co. Ltd.
+* Copyright (c) 2024 Collabora Ltd.
+*
+* Author: Elaine Zhang <zhangqing@rock-chips.com>
+* Author: Detlev Casanova <detlev.casanova@collabora.com>
+*/
+
+#ifndef _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H
+#define _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H
+
+#define SRST_A_TOP_BIU			0
+#define SRST_P_TOP_BIU			1
+#define SRST_A_TOP_MID_BIU		2
+#define SRST_A_SECURE_HIGH_BIU		3
+#define SRST_H_TOP_BIU			4
+
+#define SRST_H_VO0VOP_CHANNEL_BIU	5
+#define SRST_A_VO0VOP_CHANNEL_BIU	6
+
+#define SRST_BISRINTF			7
+
+#define SRST_H_AUDIO_BIU		8
+#define SRST_H_ASRC_2CH_0		9
+#define SRST_H_ASRC_2CH_1		10
+#define SRST_H_ASRC_4CH_0		11
+#define SRST_H_ASRC_4CH_1		12
+#define SRST_ASRC_2CH_0			13
+#define SRST_ASRC_2CH_1			14
+#define SRST_ASRC_4CH_0			15
+#define SRST_ASRC_4CH_1			16
+#define SRST_M_SAI0_8CH			17
+#define SRST_H_SAI0_8CH			18
+#define SRST_H_SPDIF_RX0		19
+#define SRST_M_SPDIF_RX0		20
+
+#define SRST_H_SPDIF_RX1		21
+#define SRST_M_SPDIF_RX1		22
+#define SRST_M_SAI1_8CH			23
+#define SRST_H_SAI1_8CH			24
+#define SRST_M_SAI2_2CH			25
+#define SRST_H_SAI2_2CH			26
+#define SRST_M_SAI3_2CH			27
+#define SRST_H_SAI3_2CH			28
+
+#define SRST_M_SAI4_2CH			29
+#define SRST_H_SAI4_2CH			30
+#define SRST_H_ACDCDIG_DSM		31
+#define SRST_M_ACDCDIG_DSM		32
+#define SRST_PDM1			33
+#define SRST_H_PDM1			34
+#define SRST_M_PDM1			35
+#define SRST_H_SPDIF_TX0		36
+#define SRST_M_SPDIF_TX0		37
+#define SRST_H_SPDIF_TX1		38
+#define SRST_M_SPDIF_TX1		39
+
+#define SRST_A_BUS_BIU			40
+#define SRST_P_BUS_BIU			41
+#define SRST_P_CRU			42
+#define SRST_H_CAN0			43
+#define SRST_CAN0			44
+#define SRST_H_CAN1			45
+#define SRST_CAN1			46
+#define SRST_P_INTMUX2BUS		47
+#define SRST_P_VCCIO_IOC		48
+#define SRST_H_BUS_BIU			49
+#define SRST_KEY_SHIFT			50
+
+#define SRST_P_I2C1			51
+#define SRST_P_I2C2			52
+#define SRST_P_I2C3			53
+#define SRST_P_I2C4			54
+#define SRST_P_I2C5			55
+#define SRST_P_I2C6			56
+#define SRST_P_I2C7			57
+#define SRST_P_I2C8			58
+#define SRST_P_I2C9			59
+#define SRST_P_WDT_BUSMCU		60
+#define SRST_T_WDT_BUSMCU		61
+#define SRST_A_GIC			62
+#define SRST_I2C1			63
+#define SRST_I2C2			64
+#define SRST_I2C3			65
+#define SRST_I2C4			66
+
+#define SRST_I2C5			67
+#define SRST_I2C6			68
+#define SRST_I2C7			69
+#define SRST_I2C8			70
+#define SRST_I2C9			71
+#define SRST_P_SARADC			72
+#define SRST_SARADC			73
+#define SRST_P_TSADC			74
+#define SRST_TSADC			75
+#define SRST_P_UART0			76
+#define SRST_P_UART2			77
+#define SRST_P_UART3			78
+#define SRST_P_UART4			79
+#define SRST_P_UART5			80
+#define SRST_P_UART6			81
+
+#define SRST_P_UART7			82
+#define SRST_P_UART8			83
+#define SRST_P_UART9			84
+#define SRST_P_UART10			85
+#define SRST_P_UART11			86
+#define SRST_S_UART0			87
+#define SRST_S_UART2			88
+#define SRST_S_UART3			89
+#define SRST_S_UART4			90
+#define SRST_S_UART5			91
+
+#define SRST_S_UART6			92
+#define SRST_S_UART7			93
+#define SRST_S_UART8			94
+#define SRST_S_UART9			95
+#define SRST_S_UART10			96
+#define SRST_S_UART11			97
+#define SRST_P_SPI0			98
+#define SRST_P_SPI1			99
+#define SRST_P_SPI2			100
+
+#define SRST_P_SPI3			101
+#define SRST_P_SPI4			102
+#define SRST_SPI0			103
+#define SRST_SPI1			104
+#define SRST_SPI2			105
+#define SRST_SPI3			106
+#define SRST_SPI4			107
+#define SRST_P_WDT0			108
+#define SRST_T_WDT0			109
+#define SRST_P_SYS_GRF			110
+#define SRST_P_PWM1			111
+#define SRST_PWM1			112
+
+#define SRST_P_BUSTIMER0		113
+#define SRST_P_BUSTIMER1		114
+#define SRST_TIMER0			115
+#define SRST_TIMER1			116
+#define SRST_TIMER2			117
+#define SRST_TIMER3			118
+#define SRST_TIMER4			119
+#define SRST_TIMER5			120
+#define SRST_P_BUSIOC			121
+#define SRST_P_MAILBOX0			122
+#define SRST_P_GPIO1			123
+
+#define SRST_GPIO1			124
+#define SRST_P_GPIO2			125
+#define SRST_GPIO2			126
+#define SRST_P_GPIO3			127
+#define SRST_GPIO3			128
+#define SRST_P_GPIO4			129
+#define SRST_GPIO4			130
+#define SRST_A_DECOM			131
+#define SRST_P_DECOM			132
+#define SRST_D_DECOM			133
+#define SRST_TIMER6			134
+#define SRST_TIMER7			135
+#define SRST_TIMER8			136
+#define SRST_TIMER9			137
+#define SRST_TIMER10			138
+
+#define SRST_TIMER11			139
+#define SRST_A_DMAC0			140
+#define SRST_A_DMAC1			141
+#define SRST_A_DMAC2			142
+#define SRST_A_SPINLOCK			143
+#define SRST_REF_PVTPLL_BUS		144
+#define SRST_H_I3C0			145
+#define SRST_H_I3C1			146
+#define SRST_H_BUS_CM0_BIU		147
+#define SRST_F_BUS_CM0_CORE		148
+#define SRST_T_BUS_CM0_JTAG		149
+
+#define SRST_P_INTMUX2PMU		150
+#define SRST_P_INTMUX2DDR		151
+#define SRST_P_PVTPLL_BUS		152
+#define SRST_P_PWM2			153
+#define SRST_PWM2			154
+#define SRST_FREQ_PWM1			155
+#define SRST_COUNTER_PWM1		156
+#define SRST_I3C0			157
+#define SRST_I3C1			158
+
+#define SRST_P_DDR_MON_CH0		159
+#define SRST_P_DDR_BIU			160
+#define SRST_P_DDR_UPCTL_CH0		161
+#define SRST_TM_DDR_MON_CH0		162
+#define SRST_A_DDR_BIU			163
+#define SRST_DFI_CH0			164
+#define SRST_DDR_MON_CH0		165
+#define SRST_P_DDR_HWLP_CH0		166
+#define SRST_P_DDR_MON_CH1		167
+#define SRST_P_DDR_HWLP_CH1		168
+
+#define SRST_P_DDR_UPCTL_CH1		169
+#define SRST_TM_DDR_MON_CH1		170
+#define SRST_DFI_CH1			171
+#define SRST_A_DDR01_MSCH0		172
+#define SRST_A_DDR01_MSCH1		173
+#define SRST_DDR_MON_CH1		174
+#define SRST_DDR_SCRAMBLE_CH0		175
+#define SRST_DDR_SCRAMBLE_CH1		176
+#define SRST_P_AHB2APB			177
+#define SRST_H_AHB2APB			178
+#define SRST_H_DDR_BIU			179
+#define SRST_F_DDR_CM0_CORE		180
+
+#define SRST_P_DDR01_MSCH0		181
+#define SRST_P_DDR01_MSCH1		182
+#define SRST_DDR_TIMER0			183
+#define SRST_DDR_TIMER1			184
+#define SRST_T_WDT_DDR			185
+#define SRST_P_WDT			186
+#define SRST_P_TIMER			187
+#define SRST_T_DDR_CM0_JTAG		188
+#define SRST_P_DDR_GRF			189
+
+#define SRST_DDR_UPCTL_CH0		190
+#define SRST_A_DDR_UPCTL_0_CH0		191
+#define SRST_A_DDR_UPCTL_1_CH0		192
+#define SRST_A_DDR_UPCTL_2_CH0		193
+#define SRST_A_DDR_UPCTL_3_CH0		194
+#define SRST_A_DDR_UPCTL_4_CH0		195
+
+#define SRST_DDR_UPCTL_CH1		196
+#define SRST_A_DDR_UPCTL_0_CH1		197
+#define SRST_A_DDR_UPCTL_1_CH1		198
+#define SRST_A_DDR_UPCTL_2_CH1		199
+#define SRST_A_DDR_UPCTL_3_CH1		200
+#define SRST_A_DDR_UPCTL_4_CH1		201
+
+#define SRST_REF_PVTPLL_DDR		202
+#define SRST_P_PVTPLL_DDR		203
+
+#define SRST_A_RKNN0			204
+#define SRST_A_RKNN0_BIU		205
+#define SRST_L_RKNN0_BIU		206
+
+#define SRST_A_RKNN1			207
+#define SRST_A_RKNN1_BIU		208
+#define SRST_L_RKNN1_BIU		209
+
+#define SRST_NPU_DAP			210
+#define SRST_L_NPUSUBSYS_BIU		211
+#define SRST_P_NPUTOP_BIU		212
+#define SRST_P_NPU_TIMER		213
+#define SRST_NPUTIMER0			214
+#define SRST_NPUTIMER1			215
+#define SRST_P_NPU_WDT			216
+#define SRST_T_NPU_WDT			217
+
+#define SRST_A_RKNN_CBUF		218
+#define SRST_A_RVCORE0			219
+#define SRST_P_NPU_GRF			220
+#define SRST_P_PVTPLL_NPU		221
+#define SRST_NPU_PVTPLL			222
+#define SRST_H_NPU_CM0_BIU		223
+#define SRST_F_NPU_CM0_CORE		224
+#define SRST_T_NPU_CM0_JTAG		225
+#define SRST_A_RKNNTOP_BIU		226
+#define SRST_H_RKNN_CBUF		227
+#define SRST_H_RKNNTOP_BIU		228
+
+#define SRST_H_NVM_BIU			229
+#define SRST_A_NVM_BIU			230
+#define SRST_S_FSPI			231
+#define SRST_H_FSPI			232
+#define SRST_C_EMMC			233
+#define SRST_H_EMMC			234
+#define SRST_A_EMMC			235
+#define SRST_B_EMMC			236
+#define SRST_T_EMMC			237
+
+#define SRST_P_GRF			238
+#define SRST_P_PHP_BIU			239
+#define SRST_A_PHP_BIU			240
+#define SRST_P_PCIE0			241
+#define SRST_PCIE0_POWER_UP		242
+
+#define SRST_A_USB3OTG1			243
+#define SRST_A_MMU0			244
+#define SRST_A_SLV_MMU0			245
+#define SRST_A_MMU1			246
+
+#define SRST_A_SLV_MMU1			247
+#define SRST_P_PCIE1			248
+#define SRST_PCIE1_POWER_UP		249
+
+#define SRST_RXOOB0			250
+#define SRST_RXOOB1			251
+#define SRST_PMALIVE0			252
+#define SRST_PMALIVE1			253
+#define SRST_A_SATA0			254
+#define SRST_A_SATA1			255
+#define SRST_ASIC1			256
+#define SRST_ASIC0			257
+
+#define SRST_P_CSIDPHY1			258
+#define SRST_SCAN_CSIDPHY1		259
+
+#define SRST_P_SDGMAC_GRF		260
+#define SRST_P_SDGMAC_BIU		261
+#define SRST_A_SDGMAC_BIU		262
+#define SRST_H_SDGMAC_BIU		263
+#define SRST_A_GMAC0			264
+#define SRST_A_GMAC1			265
+#define SRST_P_GMAC0			266
+#define SRST_P_GMAC1			267
+#define SRST_H_SDIO			268
+
+#define SRST_H_SDMMC0			269
+#define SRST_S_FSPI1			270
+#define SRST_H_FSPI1			271
+#define SRST_A_DSMC_BIU			272
+#define SRST_A_DSMC			273
+#define SRST_P_DSMC			274
+#define SRST_H_HSGPIO			275
+#define SRST_HSGPIO			276
+#define SRST_A_HSGPIO			277
+
+#define SRST_H_RKVDEC			278
+#define SRST_H_RKVDEC_BIU		279
+#define SRST_A_RKVDEC_BIU		280
+#define SRST_RKVDEC_HEVC_CA		281
+#define SRST_RKVDEC_CORE		282
+
+#define SRST_A_USB_BIU			283
+#define SRST_P_USBUFS_BIU		284
+#define SRST_A_USB3OTG0			285
+#define SRST_A_UFS_BIU			286
+#define SRST_A_MMU2			287
+#define SRST_A_SLV_MMU2			288
+#define SRST_A_UFS_SYS			289
+
+#define SRST_A_UFS			290
+#define SRST_P_USBUFS_GRF		291
+#define SRST_P_UFS_GRF			292
+
+#define SRST_H_VPU_BIU			293
+#define SRST_A_JPEG_BIU			294
+#define SRST_A_RGA_BIU			295
+#define SRST_A_VDPP_BIU			296
+#define SRST_A_EBC_BIU			297
+#define SRST_H_RGA2E_0			298
+#define SRST_A_RGA2E_0			299
+#define SRST_CORE_RGA2E_0		300
+
+#define SRST_A_JPEG			301
+#define SRST_H_JPEG			302
+#define SRST_H_VDPP			303
+#define SRST_A_VDPP			304
+#define SRST_CORE_VDPP			305
+#define SRST_H_RGA2E_1			306
+#define SRST_A_RGA2E_1			307
+#define SRST_CORE_RGA2E_1		308
+#define SRST_H_EBC			309
+#define SRST_A_EBC			310
+#define SRST_D_EBC			311
+
+#define SRST_H_VEPU0_BIU		312
+#define SRST_A_VEPU0_BIU		313
+#define SRST_H_VEPU0			314
+#define SRST_A_VEPU0			315
+#define SRST_VEPU0_CORE			316
+
+#define SRST_A_VI_BIU			317
+#define SRST_H_VI_BIU			318
+#define SRST_P_VI_BIU			319
+#define SRST_D_VICAP			320
+#define SRST_A_VICAP			321
+#define SRST_H_VICAP			322
+#define SRST_ISP0			323
+#define SRST_ISP0_VICAP			324
+
+#define SRST_CORE_VPSS			325
+#define SRST_P_CSI_HOST_0		326
+#define SRST_P_CSI_HOST_1		327
+#define SRST_P_CSI_HOST_2		328
+#define SRST_P_CSI_HOST_3		329
+#define SRST_P_CSI_HOST_4		330
+
+#define SRST_CIFIN			331
+#define SRST_VICAP_I0CLK		332
+#define SRST_VICAP_I1CLK		333
+#define SRST_VICAP_I2CLK		334
+#define SRST_VICAP_I3CLK		335
+#define SRST_VICAP_I4CLK		336
+
+#define SRST_A_VOP_BIU			337
+#define SRST_A_VOP2_BIU			338
+#define SRST_H_VOP_BIU			339
+#define SRST_P_VOP_BIU			340
+#define SRST_H_VOP			341
+#define SRST_A_VOP			342
+#define SRST_D_VP0			343
+
+#define SRST_D_VP1			344
+#define SRST_D_VP2			345
+#define SRST_P_VOP2_BIU			346
+#define SRST_P_VOPGRF			347
+
+#define SRST_H_VO0_BIU			348
+#define SRST_P_VO0_BIU			349
+#define SRST_A_HDCP0_BIU		350
+#define SRST_P_VO0_GRF			351
+#define SRST_A_HDCP0			352
+#define SRST_H_HDCP0			353
+#define SRST_HDCP0			354
+
+#define SRST_P_DSIHOST0			355
+#define SRST_DSIHOST0			356
+#define SRST_P_HDMITX0			357
+#define SRST_HDMITX0_REF		358
+#define SRST_P_EDP0			359
+#define SRST_EDP0_24M			360
+
+#define SRST_M_SAI5_8CH			361
+#define SRST_H_SAI5_8CH			362
+#define SRST_M_SAI6_8CH			363
+#define SRST_H_SAI6_8CH			364
+#define SRST_H_SPDIF_TX2		365
+#define SRST_M_SPDIF_TX2		366
+#define SRST_H_SPDIF_RX2		367
+#define SRST_M_SPDIF_RX2		368
+
+#define SRST_H_SAI8_8CH			369
+#define SRST_M_SAI8_8CH			370
+
+#define SRST_H_VO1_BIU			371
+#define SRST_P_VO1_BIU			372
+#define SRST_M_SAI7_8CH			373
+#define SRST_H_SAI7_8CH			374
+#define SRST_H_SPDIF_TX3		375
+#define SRST_H_SPDIF_TX4		376
+#define SRST_H_SPDIF_TX5		377
+#define SRST_M_SPDIF_TX3		378
+
+#define SRST_DP0			379
+#define SRST_P_VO1_GRF			380
+#define SRST_A_HDCP1_BIU		381
+#define SRST_A_HDCP1			382
+#define SRST_H_HDCP1			383
+#define SRST_HDCP1			384
+#define SRST_H_SAI9_8CH			385
+#define SRST_M_SAI9_8CH			386
+#define SRST_M_SPDIF_TX4		387
+#define SRST_M_SPDIF_TX5		388
+
+#define SRST_GPU			389
+#define SRST_A_S_GPU_BIU		390
+#define SRST_A_M0_GPU_BIU		391
+#define SRST_P_GPU_BIU			392
+#define SRST_P_GPU_GRF			393
+#define SRST_GPU_PVTPLL			394
+#define SRST_P_PVTPLL_GPU		395
+
+#define SRST_A_CENTER_BIU		396
+#define SRST_A_DMA2DDR			397
+#define SRST_A_DDR_SHAREMEM		398
+#define SRST_A_DDR_SHAREMEM_BIU		399
+#define SRST_H_CENTER_BIU		400
+#define SRST_P_CENTER_GRF		401
+#define SRST_P_DMA2DDR			402
+#define SRST_P_SHAREMEM			403
+#define SRST_P_CENTER_BIU		404
+
+#define SRST_LINKSYM_HDMITXPHY0		405
+
+#define SRST_DP0_PIXELCLK		406
+#define SRST_PHY_DP0_TX			407
+#define SRST_DP1_PIXELCLK		408
+#define SRST_DP2_PIXELCLK		409
+
+#define SRST_H_VEPU1_BIU		410
+#define SRST_A_VEPU1_BIU		411
+#define SRST_H_VEPU1			412
+#define SRST_A_VEPU1			413
+#define SRST_VEPU1_CORE			414
+
+#define SRST_P_PHPPHY_CRU		415
+#define SRST_P_APB2ASB_SLV_CHIP_TOP	416
+#define SRST_P_PCIE2_COMBOPHY0		417
+#define SRST_P_PCIE2_COMBOPHY0_GRF	418
+#define SRST_P_PCIE2_COMBOPHY1		419
+#define SRST_P_PCIE2_COMBOPHY1_GRF	420
+
+#define SRST_PCIE0_PIPE_PHY		421
+#define SRST_PCIE1_PIPE_PHY		422
+
+#define SRST_H_CRYPTO_NS		423
+#define SRST_H_TRNG_NS			424
+#define SRST_P_OTPC_NS			425
+#define SRST_OTPC_NS			426
+
+#define SRST_P_HDPTX_GRF		427
+#define SRST_P_HDPTX_APB		428
+#define SRST_P_MIPI_DCPHY		429
+#define SRST_P_DCPHY_GRF		430
+#define SRST_P_BOT0_APB2ASB		431
+#define SRST_P_BOT1_APB2ASB		432
+#define SRST_USB2DEBUG			433
+#define SRST_P_CSIPHY_GRF		434
+#define SRST_P_CSIPHY			435
+#define SRST_P_USBPHY_GRF_0		436
+#define SRST_P_USBPHY_GRF_1		437
+#define SRST_P_USBDP_GRF		438
+#define SRST_P_USBDPPHY			439
+#define SRST_USBDP_COMBO_PHY_INIT	440
+
+#define SRST_USBDP_COMBO_PHY_CMN	441
+#define SRST_USBDP_COMBO_PHY_LANE	442
+#define SRST_USBDP_COMBO_PHY_PCS	443
+#define SRST_M_MIPI_DCPHY		444
+#define SRST_S_MIPI_DCPHY		445
+#define SRST_SCAN_CSIPHY		446
+#define SRST_P_VCCIO6_IOC		447
+#define SRST_OTGPHY_0			448
+#define SRST_OTGPHY_1			449
+#define SRST_HDPTX_INIT			450
+#define SRST_HDPTX_CMN			451
+#define SRST_HDPTX_LANE			452
+#define SRST_HDMITXHDP			453
+
+#define SRST_MPHY_INIT			454
+#define SRST_P_MPHY_GRF			455
+#define SRST_P_VCCIO7_IOC		456
+
+#define SRST_H_PMU1_BIU			457
+#define SRST_P_PMU1_NIU			458
+#define SRST_H_PMU_CM0_BIU		459
+#define SRST_PMU_CM0_CORE		460
+#define SRST_PMU_CM0_JTAG		461
+
+#define SRST_P_CRU_PMU1			462
+#define SRST_P_PMU1_GRF			463
+#define SRST_P_PMU1_IOC			464
+#define SRST_P_PMU1WDT			465
+#define SRST_T_PMU1WDT			466
+#define SRST_P_PMUTIMER			467
+#define SRST_PMUTIMER0			468
+#define SRST_PMUTIMER1			469
+#define SRST_P_PMU1PWM			470
+#define SRST_PMU1PWM			471
+
+#define SRST_P_I2C0			472
+#define SRST_I2C0			473
+#define SRST_S_UART1			474
+#define SRST_P_UART1			475
+#define SRST_PDM0			476
+#define SRST_H_PDM0			477
+
+#define SRST_M_PDM0			478
+#define SRST_H_VAD			479
+
+#define SRST_P_PMU0GRF			480
+#define SRST_P_PMU0IOC			481
+#define SRST_P_GPIO0			482
+#define SRST_DB_GPIO0			483
+
+#endif
-- 
cgit v1.2.3


From b31c9d9dc343146b9f4ce67b4eee748c49296e99 Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Tue, 27 Aug 2024 17:19:29 +0900
Subject: HID: hidraw: add HIDIOCREVOKE ioctl

There is a need for userspace applications to open HID devices directly.
Use-cases include configuration of gaming mice or direct access to
joystick devices. The latter is currently handled by the uaccess tag in
systemd, other devices include more custom/local configurations or just
sudo.

A better approach is what we already have for evdev devices: give the
application a file descriptor and revoke it when it may no longer access
that device.

This patch is the hidraw equivalent to the EVIOCREVOKE ioctl, see
commit c7dc65737c9a ("Input: evdev - add EVIOCREVOKE ioctl") for full
details.

An MR for systemd-logind has been filed here:
https://github.com/systemd/systemd/pull/33970

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Link: https://patch.msgid.link/20240827-hidraw-revoke-v5-1-d004a7451aea@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hidraw.h      | 1 +
 include/uapi/linux/hidraw.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h
index cd67f4ca5599..18fd30a288de 100644
--- a/include/linux/hidraw.h
+++ b/include/linux/hidraw.h
@@ -32,6 +32,7 @@ struct hidraw_list {
 	struct hidraw *hidraw;
 	struct list_head node;
 	struct mutex read_mutex;
+	bool revoked;
 };
 
 #ifdef CONFIG_HIDRAW
diff --git a/include/uapi/linux/hidraw.h b/include/uapi/linux/hidraw.h
index 33ebad81720a..d5ee269864e0 100644
--- a/include/uapi/linux/hidraw.h
+++ b/include/uapi/linux/hidraw.h
@@ -46,6 +46,7 @@ struct hidraw_devinfo {
 /* The first byte of SOUTPUT and GOUTPUT is the report number */
 #define HIDIOCSOUTPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0B, len)
 #define HIDIOCGOUTPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0C, len)
+#define HIDIOCREVOKE	      _IOW('H', 0x0D, int) /* Revoke device access */
 
 #define HIDRAW_FIRST_MINOR 0
 #define HIDRAW_MAX_DEVICES 64
-- 
cgit v1.2.3


From b35243a447b9fe6457fa8e1352152b818436ba5a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 26 Aug 2024 19:37:54 +0200
Subject: block: rework bio splitting

The current setup with bio_may_exceed_limit and __bio_split_to_limits
is a bit of a mess.

Change it so that __bio_split_to_limits does all the work and is just
a variant of bio_split_to_limits that returns nr_segs.  This is done
by inlining it and instead have the various bio_split_* helpers directly
submit the potentially split bios.

To support btrfs, the rw version has a lower level helper split out
that just returns the offset to split.  This turns out to nicely clean
up the btrfs flow as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Link: https://lore.kernel.org/r/20240826173820.1690925-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index a46e2047bea4..faceadb040f9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,8 +324,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
 void bio_trim(struct bio *bio, sector_t offset, sector_t size);
 extern struct bio *bio_split(struct bio *bio, int sectors,
 			     gfp_t gfp, struct bio_set *bs);
-struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
-		unsigned *segs, struct bio_set *bs, unsigned max_bytes);
+int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
+		unsigned *segs, unsigned max_bytes);
 
 /**
  * bio_next_split - get next @sectors from a bio, splitting if necessary
-- 
cgit v1.2.3


From 379b122a3ec8033aa43cb70e8ecb6fb7f98aa68f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 26 Aug 2024 19:37:55 +0200
Subject: block: constify the lim argument to
 queue_limits_max_zone_append_sectors

queue_limits_max_zone_append_sectors doesn't change the lim argument,
so mark it as const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Link: https://lore.kernel.org/r/20240826173820.1690925-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e85ec73a07d5..ec3ea5d1f99d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1187,7 +1187,8 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
-static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l)
+static inline unsigned int
+queue_limits_max_zone_append_sectors(const struct queue_limits *l)
 {
 	unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
 
-- 
cgit v1.2.3


From 2e1a57d5b0adbb5bd1d85245ec29b6d3cc7edc69 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Wed, 21 Aug 2024 16:54:52 -0500
Subject: mfd: axp20x: Add ADC, BAT, and USB cells for AXP717

Add support for the AXP717 PMIC to utilize the ADC (for reading
voltage, current, and temperature information from the PMIC) as well
as the USB charger and battery.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Link: https://lore.kernel.org/r/20240821215456.962564-12-macroalpha82@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/axp20x.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 8c0a33a2e9ce..9b7ef473adcb 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -115,6 +115,16 @@ enum axp20x_variants {
 #define AXP313A_IRQ_STATE		0x21
 
 #define AXP717_ON_INDICATE		0x00
+#define AXP717_PMU_STATUS_2		0x01
+#define AXP717_BC_DETECT		0x05
+#define AXP717_PMU_FAULT		0x08
+#define AXP717_MODULE_EN_CONTROL_1	0x0b
+#define AXP717_MIN_SYS_V_CONTROL	0x15
+#define AXP717_INPUT_VOL_LIMIT_CTRL	0x16
+#define AXP717_INPUT_CUR_LIMIT_CTRL	0x17
+#define AXP717_MODULE_EN_CONTROL_2	0x19
+#define AXP717_BOOST_CONTROL		0x1e
+#define AXP717_VSYS_V_POWEROFF		0x24
 #define AXP717_IRQ0_EN			0x40
 #define AXP717_IRQ1_EN			0x41
 #define AXP717_IRQ2_EN			0x42
@@ -125,6 +135,9 @@ enum axp20x_variants {
 #define AXP717_IRQ2_STATE		0x4a
 #define AXP717_IRQ3_STATE		0x4b
 #define AXP717_IRQ4_STATE		0x4c
+#define AXP717_ICC_CHG_SET		0x62
+#define AXP717_ITERM_CHG_SET		0x63
+#define AXP717_CV_CHG_SET		0x64
 #define AXP717_DCDC_OUTPUT_CONTROL	0x80
 #define AXP717_DCDC1_CONTROL		0x83
 #define AXP717_DCDC2_CONTROL		0x84
@@ -145,6 +158,19 @@ enum axp20x_variants {
 #define AXP717_CLDO3_CONTROL		0x9d
 #define AXP717_CLDO4_CONTROL		0x9e
 #define AXP717_CPUSLDO_CONTROL		0x9f
+#define AXP717_BATT_PERCENT_DATA	0xa4
+#define AXP717_ADC_CH_EN_CONTROL	0xc0
+#define AXP717_BATT_V_H			0xc4
+#define AXP717_BATT_V_L			0xc5
+#define AXP717_VBUS_V_H			0xc6
+#define AXP717_VBUS_V_L			0xc7
+#define AXP717_VSYS_V_H			0xc8
+#define AXP717_VSYS_V_L			0xc9
+#define AXP717_BATT_CHRG_I_H		0xca
+#define AXP717_BATT_CHRG_I_L		0xcb
+#define AXP717_ADC_DATA_SEL		0xcd
+#define AXP717_ADC_DATA_H		0xce
+#define AXP717_ADC_DATA_L		0xcf
 
 #define AXP806_STARTUP_SRC		0x00
 #define AXP806_CHIP_ID			0x03
-- 
cgit v1.2.3


From 36b531aace379f45e4ca763f7efb8d0295216e83 Mon Sep 17 00:00:00 2001
From: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Date: Tue, 27 Aug 2024 02:58:20 +0000
Subject: ACPICA: Detect FACS in reduced hardware build

According to Section 5.2.10 of ACPI Specification, FACS is optional
in reduced hardware model. Enable the detection for "Hardware-reduced
ACPI support only" build (CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y) also.

Link: https://github.com/acpica/acpica/commit/ee53ed6b5452612bb44af542b68d605f8b2b1104
Signed-off-by: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Link: https://patch.msgid.link/20240827025821.2099068-2-jiaqing.zhao@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acconfig.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index d768d9c568cf..2da5f4a6e814 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -67,7 +67,6 @@
  *      General Purpose Events (GPEs)
  *      Global Lock
  *      ACPI PM timer
- *      FACS table (Waking vectors and Global Lock)
  */
 #ifndef ACPI_REDUCED_HARDWARE
 #define ACPI_REDUCED_HARDWARE           FALSE
-- 
cgit v1.2.3


From 36b5c1dc4b22913f9813a94350e24f6744db38a8 Mon Sep 17 00:00:00 2001
From: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Date: Tue, 27 Aug 2024 02:58:21 +0000
Subject: ACPICA: Allow setting waking vector on reduced hardware platforms

Allow setting waking vector in FACS table on reduced hardware platforms
to support S3 wakeup.

Link: https://github.com/acpica/acpica/commit/ee53ed6b5452612bb44af542b68d605f8b2b1104
Signed-off-by: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Link: https://patch.msgid.link/20240827025821.2099068-3-jiaqing.zhao@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 80dc36f9d527..99bb45a46600 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -881,10 +881,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_leave_sleep_state_prep(u8 sleep_state))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state))
 
-ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				acpi_set_firmware_waking_vector
-				(acpi_physical_address physical_address,
-				 acpi_physical_address physical_address64))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_set_firmware_waking_vector
+			    (acpi_physical_address physical_address,
+			     acpi_physical_address physical_address64))
 /*
  * ACPI Timer interfaces
  */
-- 
cgit v1.2.3


From c0390d541128e8820af8177a572d9d87ff68a3bb Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 Aug 2024 21:06:58 +0200
Subject: fs: pack struct file

Now that we shrunk struct file to 192 bytes aka 3 cachelines reorder
struct file to not leave any holes or have members cross cachelines.

Add a short comment to each of the fields and mark the cachelines.
It's possible that we may have to tweak this based on profiling in the
future. So far I had Jens test this comparing io_uring with non-fixed
and fixed files and it improved performance. The layout is a combination
of Jens' and my changes.

Link: https: //lore.kernel.org/r/20240824-peinigen-hocken-7384b977c643@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 91 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 51 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 095a956aeb29..af8bbd4eeb3a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -987,52 +987,63 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 		index <  ra->start + ra->size);
 }
 
-/*
- * f_{lock,count,pos_lock} members can be highly contended and share
- * the same cacheline. f_{lock,mode} are very frequently used together
- * and so share the same cacheline as well. The read-mostly
- * f_{path,inode,op} are kept on a separate cacheline.
+/**
+ * struct file - Represents a file
+ * @f_count: reference count
+ * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
+ * @f_mode: FMODE_* flags often used in hotpaths
+ * @f_op: file operations
+ * @f_mapping: Contents of a cacheable, mappable object.
+ * @private_data: filesystem or driver specific data
+ * @f_inode: cached inode
+ * @f_flags: file flags
+ * @f_iocb_flags: iocb flags
+ * @f_cred: stashed credentials of creator/opener
+ * @f_path: path of the file
+ * @f_pos_lock: lock protecting file position
+ * @f_pos: file position
+ * @f_version: file version
+ * @f_security: LSM security context of this file
+ * @f_owner: file owner
+ * @f_wb_err: writeback error
+ * @f_sb_err: per sb writeback errors
+ * @f_ep: link of all epoll hooks for this file
+ * @f_task_work: task work entry point
+ * @f_llist: work queue entrypoint
+ * @f_ra: file's readahead state
  */
 struct file {
-	union {
-		/* fput() uses task work when closing and freeing file (default). */
-		struct callback_head 	f_task_work;
-		/* fput() must use workqueue (most kernel threads). */
-		struct llist_node	f_llist;
-		/* Invalid after last fput(). */
-		struct file_ra_state	f_ra;
-	};
-	/*
-	 * Protects f_ep, f_flags.
-	 * Must not be taken from IRQ context.
-	 */
-	spinlock_t		f_lock;
-	fmode_t			f_mode;
-	atomic_long_t		f_count;
-	struct mutex		f_pos_lock;
-	loff_t			f_pos;
-	unsigned int		f_flags;
-	unsigned int 		f_iocb_flags;
-	struct fown_struct	*f_owner;
-	const struct cred	*f_cred;
-	struct path		f_path;
-	struct inode		*f_inode;	/* cached value */
+	atomic_long_t			f_count;
+	spinlock_t			f_lock;
+	fmode_t				f_mode;
 	const struct file_operations	*f_op;
-
-	u64			f_version;
+	struct address_space		*f_mapping;
+	void				*private_data;
+	struct inode			*f_inode;
+	unsigned int			f_flags;
+	unsigned int			f_iocb_flags;
+	const struct cred		*f_cred;
+	/* --- cacheline 1 boundary (64 bytes) --- */
+	struct path			f_path;
+	struct mutex			f_pos_lock;
+	loff_t				f_pos;
+	u64				f_version;
+	/* --- cacheline 2 boundary (128 bytes) --- */
 #ifdef CONFIG_SECURITY
-	void			*f_security;
+	void				*f_security;
 #endif
-	/* needed for tty driver, and maybe others */
-	void			*private_data;
-
+	struct fown_struct		*f_owner;
+	errseq_t			f_wb_err;
+	errseq_t			f_sb_err;
 #ifdef CONFIG_EPOLL
-	/* Used by fs/eventpoll.c to link all the hooks to this file */
-	struct hlist_head	*f_ep;
-#endif /* #ifdef CONFIG_EPOLL */
-	struct address_space	*f_mapping;
-	errseq_t		f_wb_err;
-	errseq_t		f_sb_err; /* for syncfs */
+	struct hlist_head		*f_ep;
+#endif
+	union {
+		struct callback_head	f_task_work;
+		struct llist_node	f_llist;
+		struct file_ra_state	f_ra;
+	};
+	/* --- cacheline 3 boundary (192 bytes) --- */
 } __randomize_layout
   __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
-- 
cgit v1.2.3


From d345bd2e9834e2da505977e154a1c179c793b7b2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 28 Aug 2024 12:56:24 +0200
Subject: mm: add kmem_cache_create_rcu()

When a kmem cache is created with SLAB_TYPESAFE_BY_RCU the free pointer
must be located outside of the object because we don't know what part of
the memory can safely be overwritten as it may be needed to prevent
object recycling.

That has the consequence that SLAB_TYPESAFE_BY_RCU may end up adding a
new cacheline. This is the case for e.g., struct file. After having it
shrunk down by 40 bytes and having it fit in three cachelines we still
have SLAB_TYPESAFE_BY_RCU adding a fourth cacheline because it needs to
accommodate the free pointer.

Add a new kmem_cache_create_rcu() function that allows the caller to
specify an offset where the free pointer is supposed to be placed.

Link: https://lore.kernel.org/r/20240828-work-kmem_cache-rcu-v3-2-5460bc1f09f6@kernel.org
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/slab.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index eb2bf4629157..5b2da2cf31a8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -212,6 +212,12 @@ enum _slab_flag_bits {
 #define SLAB_NO_OBJ_EXT		__SLAB_FLAG_UNUSED
 #endif
 
+/*
+ * freeptr_t represents a SLUB freelist pointer, which might be encoded
+ * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
+ */
+typedef struct { unsigned long v; } freeptr_t;
+
 /*
  * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
  *
@@ -242,6 +248,9 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 			slab_flags_t flags,
 			unsigned int useroffset, unsigned int usersize,
 			void (*ctor)(void *));
+struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size,
+					 unsigned int freeptr_offset,
+					 slab_flags_t flags);
 void kmem_cache_destroy(struct kmem_cache *s);
 int kmem_cache_shrink(struct kmem_cache *s);
 
-- 
cgit v1.2.3


From ea566e18b4deea6e998088de4f1a76d1f39c8d3f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 28 Aug 2024 12:56:25 +0200
Subject: fs: use kmem_cache_create_rcu()

Switch to the new kmem_cache_create_rcu() helper which allows us to use
a custom free pointer offset avoiding the need to have an external free
pointer which would grow struct file behind our backs.

Link: https://lore.kernel.org/r/20240828-work-kmem_cache-rcu-v3-3-5460bc1f09f6@kernel.org
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index af8bbd4eeb3a..58c91a52cad1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1011,6 +1011,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  * @f_task_work: task work entry point
  * @f_llist: work queue entrypoint
  * @f_ra: file's readahead state
+ * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
  */
 struct file {
 	atomic_long_t			f_count;
@@ -1042,6 +1043,7 @@ struct file {
 		struct callback_head	f_task_work;
 		struct llist_node	f_llist;
 		struct file_ra_state	f_ra;
+		freeptr_t		f_freeptr;
 	};
 	/* --- cacheline 3 boundary (192 bytes) --- */
 } __randomize_layout
-- 
cgit v1.2.3


From ae98dbf43d755b4e111fcd086e53939bef3e9a1a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 9 Aug 2024 11:20:45 -0600
Subject: io_uring/kbuf: add support for incremental buffer consumption

By default, any recv/read operation that uses provided buffers will
consume at least 1 buffer fully (and maybe more, in case of bundles).
This adds support for incremental consumption, meaning that an
application may add large buffers, and each read/recv will just consume
the part of the buffer that it needs.

For example, let's say an application registers 1MB buffers in a
provided buffer ring, for streaming receives. If it gets a short recv,
then the full 1MB buffer will be consumed and passed back to the
application. With incremental consumption, only the part that was
actually used is consumed, and the buffer remains the current one.

This means that both the application and the kernel needs to keep track
of what the current receive point is. Each recv will still pass back a
buffer ID and the size consumed, the only difference is that before the
next receive would always be the next buffer in the ring. Now the same
buffer ID may return multiple receives, each at an offset into that
buffer from where the previous receive left off. Example:

Application registers a provided buffer ring, and adds two 32K buffers
to the ring.

Buffer1 address: 0x1000000 (buffer ID 0)
Buffer2 address: 0x2000000 (buffer ID 1)

A recv completion is received with the following values:

cqe->res	0x1000	(4k bytes received)
cqe->flags	0x11	(CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 0)

and the application now knows that 4096b of data is available at
0x1000000, the start of that buffer, and that more data from this buffer
will be coming. Now the next receive comes in:

cqe->res	0x2010	(8k bytes received)
cqe->flags	0x11	(CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 0)

which tells the application that 8k is available where the last
completion left off, at 0x1001000. Next completion is:

cqe->res	0x5000	(20k bytes received)
cqe->flags	0x1	(CQE_F_BUFFER set, buffer ID 0)

and the application now knows that 20k of data is available at
0x1003000, which is where the previous receive ended. CQE_F_BUF_MORE
isn't set, as no more data is available in this buffer ID. The next
completion is then:

cqe->res	0x1000	(4k bytes received)
cqe->flags	0x10001	(CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 1)

which tells the application that buffer ID 1 is now the current one,
hence there's 4k of valid data at 0x2000000. 0x2001000 will be the next
receive point for this buffer ID.

When a buffer will be reused by future CQE completions,
IORING_CQE_BUF_MORE will be set in cqe->flags. This tells the application
that the kernel isn't done with the buffer yet, and that it should expect
more completions for this buffer ID. Will only be set by provided buffer
rings setup with IOU_PBUF_RING INC, as that's the only type of buffer
that will see multiple consecutive completions for the same buffer ID.
For any other provided buffer type, any completion that passes back
a buffer to the application is final.

Once a buffer has been fully consumed, the buffer ring head is
incremented and the next receive will indicate the next buffer ID in the
CQE cflags.

On the send side, the application can manage how much data is sent from
an existing buffer by setting sqe->len to the desired send length.

An application can request incremental consumption by setting
IOU_PBUF_RING_INC in the provided buffer ring registration. Outside of
that, any provided buffer ring setup and buffer additions is done like
before, no changes there. The only change is in how an application may
see multiple completions for the same buffer ID, hence needing to know
where the next receive will happen.

Note that like existing provided buffer rings, this should not be used
with IOSQE_ASYNC, as both really require the ring to remain locked over
the duration of the buffer selection and the operation completion. It
will consume a buffer otherwise regardless of the size of the IO done.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 042eab793e26..a275f91d2ac0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -440,11 +440,21 @@ struct io_uring_cqe {
  * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
  * IORING_CQE_F_NOTIF	Set for notification CQEs. Can be used to distinct
  * 			them from sends.
+ * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get
+ *			more completions. In other words, the buffer is being
+ *			partially consumed, and will be used by the kernel for
+ *			more completions. This is only set for buffers used via
+ *			the incremental buffer consumption, as provided by
+ *			a ring buffer setup with IOU_PBUF_RING_INC. For any
+ *			other provided buffer type, all completions with a
+ *			buffer passed back is automatically returned to the
+ *			application.
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
 #define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)
 #define IORING_CQE_F_NOTIF		(1U << 3)
+#define IORING_CQE_F_BUF_MORE		(1U << 4)
 
 #define IORING_CQE_BUFFER_SHIFT		16
 
@@ -716,9 +726,17 @@ struct io_uring_buf_ring {
  *			mmap(2) with the offset set as:
  *			IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
  *			to get a virtual mapping for the ring.
+ * IOU_PBUF_RING_INC:	If set, buffers consumed from this buffer ring can be
+ *			consumed incrementally. Normally one (or more) buffers
+ *			are fully consumed. With incremental consumptions, it's
+ *			feasible to register big ranges of buffers, and each
+ *			use of it will consume only as much as it needs. This
+ *			requires that both the kernel and application keep
+ *			track of where the current read/recv index is at.
  */
 enum io_uring_register_pbuf_ring_flags {
 	IOU_PBUF_RING_MMAP	= 1,
+	IOU_PBUF_RING_INC	= 2,
 };
 
 /* argument for IORING_(UN)REGISTER_PBUF_RING */
-- 
cgit v1.2.3


From 7afea7bc49c5482ac11fc4488230827edc51e28a Mon Sep 17 00:00:00 2001
From: Adrien Destugues <adrien.destugues@opensource.viveris.fr>
Date: Tue, 12 Dec 2023 12:28:17 +0100
Subject: ACPICA: haiku: Fix invalid value used for semaphores

ACPICA commit 49fe4f25483feec2f685b204ef19e28d92979e95

In Haiku, semaphores are represented by integers, not pointers.
So, we can't use NULL as the invalid/destroyed value, the correct value
is -1. Introduce a platform overridable define to allow this.

Fixes #162 (which was closed after coming to the conclusion that this
should be done, but the change was never done).

Link: https://github.com/acpica/acpica/commit/49fe4f25
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/platform/acenv.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 337ffa931ee8..3f31df09a9d6 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -252,6 +252,12 @@
 #define ACPI_RELEASE_GLOBAL_LOCK(Glptr, pending) pending = 0
 #endif
 
+/* NULL/invalid value to use for destroyed or not-yet-created semaphores. */
+
+#ifndef ACPI_SEMAPHORE_NULL
+#define ACPI_SEMAPHORE_NULL NULL
+#endif
+
 /* Flush CPU cache - used when going to sleep. Wbinvd or similar. */
 
 #ifndef ACPI_FLUSH_CPU_CACHE
-- 
cgit v1.2.3


From ff418f34ba44d8ff6cea953dd79546f3e4c6c807 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 20 Mar 2024 13:15:08 +0800
Subject: ACPICA: Complete CXL 3.0 CXIMS structures

ACPICA commit eb2a2ff303416fb3f6c425d519dbcd6988dbd91f

Commit 2d8dc0383d3c9 ("Add CXL 3.0 structures (CXIMS & RDPAS) to the
CEDT table") introduces basic support for CXL XOR Interleave Math
Structure (CXIMS).

Complete the CXIMS structures.

No functional change.

Link: https://github.com/acpica/acpica/commit/eb2a2ff3
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 841ef9f22795..8cfcd1e1c177 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -567,6 +567,10 @@ struct acpi_cedt_cxims {
 	u64 xormap_list[];
 };
 
+struct acpi_cedt_cxims_target_element {
+	u64 xormap;
+};
+
 /* 3: CXL RCEC Downstream Port Association Structure */
 
 struct acpi_cedt_rdpas {
-- 
cgit v1.2.3


From 7741e3c5f848e867c58c6e612c63a27c1681a493 Mon Sep 17 00:00:00 2001
From: Sia Jee Heng <jeeheng.sia@starfivetech.com>
Date: Wed, 28 Feb 2024 22:39:50 -0800
Subject: ACPICA: SPCR: Update the SPCR table to version 4

ACPICA commit 1eeff52124a45d5cd887ba5687bbad0116e4d211

The Microsoft Serial Port Console Redirection (SPCR) specification
revision 1.09 comprises additional fields [1]. The newly added fields
are:
- RISC-V SBI
- Precise Baud Rate
- namespace_string_length
- namespace_string_offset
- namespace_string

Additionaly, this code will support up to SPCR revision 1.10, as it
includes only minor wording changes.

Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/serports/serial-port-console-redirection-table # [1]
Link: https://github.com/acpica/acpica/commit/1eeff521
Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl3.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index 8f775e3a08fd..5cd755143b7d 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -92,10 +92,10 @@ struct acpi_table_slit {
 /*******************************************************************************
  *
  * SPCR - Serial Port Console Redirection table
- *        Version 2
+ *        Version 4
  *
  * Conforms to "Serial Port Console Redirection Table",
- * Version 1.03, August 10, 2015
+ * Version 1.10, Jan 5, 2023
  *
  ******************************************************************************/
 
@@ -112,7 +112,7 @@ struct acpi_table_spcr {
 	u8 stop_bits;
 	u8 flow_control;
 	u8 terminal_type;
-	u8 reserved1;
+	u8 language;
 	u16 pci_device_id;
 	u16 pci_vendor_id;
 	u8 pci_bus;
@@ -120,7 +120,11 @@ struct acpi_table_spcr {
 	u8 pci_function;
 	u32 pci_flags;
 	u8 pci_segment;
-	u32 reserved2;
+	u32 uart_clk_freq;
+	u32 precise_baudrate;
+	u16 name_space_string_length;
+	u16 name_space_string_offset;
+	char name_space_string[];
 };
 
 /* Masks for pci_flags field above */
-- 
cgit v1.2.3


From 4aca2bef90bd12969037468f71568fbef994a254 Mon Sep 17 00:00:00 2001
From: Sia Jee Heng <jeeheng.sia@starfivetech.com>
Date: Wed, 28 Feb 2024 22:41:55 -0800
Subject: ACPICA: Headers: Add RISC-V SBI Subtype to DBG2

ACPICA commit 6f4c900bcf9ca065129353f17a83773aa58095aa

Include the RISC-V SBI debugging subtype as documented in DBG2
dated April 10, 2023 [1].

Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/bringup/acpi-debug-port-table # [1]
Link: https://github.com/acpica/acpica/commit/6f4c900b
Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 8cfcd1e1c177..89f0df489dc3 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -755,6 +755,7 @@ struct acpi_dbg2_device {
 #define ACPI_DBG2_16550_WITH_GAS    0x0012
 #define ACPI_DBG2_SDM845_7_372MHZ   0x0013
 #define ACPI_DBG2_INTEL_LPSS        0x0014
+#define ACPI_DBG2_RISCV_SBI_CON     0x0015
 
 #define ACPI_DBG2_1394_STANDARD     0x0000
 
-- 
cgit v1.2.3


From cf94e10a037c337559bf6c5486cc1abdf4900a08 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@bytedance.com>
Date: Wed, 7 Feb 2024 19:54:20 +0000
Subject: ACPICA: MPAM: Correct the typo in struct acpi_mpam_msc_node member

ACPICA commit 3da3f7d776d17e9bfbb15de88317de8d7397ce38

A member of the struct acpi_mpam_msc_node that represents a Memory System
Controller node structure - num_resource_nodes has a typo. Fix the typo

No functional change.

Link: https://github.com/acpica/acpica/commit/3da3f7d7
Signed-off-by: Punit Agrawal <punit.agrawal@bytedance.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index e27958ef8264..d3858eebc255 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1607,7 +1607,7 @@ struct acpi_mpam_msc_node {
 	u32 max_nrdy_usec;
 	u64 hardware_id_linked_device;
 	u32 instance_id_linked_device;
-	u32 num_resouce_nodes;
+	u32 num_resource_nodes;
 };
 
 struct acpi_table_mpam {
-- 
cgit v1.2.3


From 632b746b108e3c62e0795072d00ed597371c738a Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Tue, 2 Apr 2024 21:09:45 -0700
Subject: ACPICA: Implement ACPI_WARNING_ONCE and ACPI_ERROR_ONCE

ACPICA commit 2ad4e6e7c4118f4cdfcad321c930b836cec77406

In some cases it is not practical nor useful to nag user about some
firmware errors that they cannot fix. Add a macro that will print a
warning or error only once to be used in these cases.

Link: https://github.com/acpica/acpica/commit/2ad4e6e7
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acoutput.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index b1571dd96310..5e0346142f98 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -193,6 +193,7 @@
  */
 #ifndef ACPI_NO_ERROR_MESSAGES
 #define AE_INFO                         _acpi_module_name, __LINE__
+#define ACPI_ONCE(_fn, _plist)                  { static char _done; if (!_done) { _done = 1; _fn _plist; } }
 
 /*
  * Error reporting. Callers module and line number are inserted by AE_INFO,
@@ -201,8 +202,10 @@
  */
 #define ACPI_INFO(plist)                acpi_info plist
 #define ACPI_WARNING(plist)             acpi_warning plist
+#define ACPI_WARNING_ONCE(plist)        ACPI_ONCE(acpi_warning, plist)
 #define ACPI_EXCEPTION(plist)           acpi_exception plist
 #define ACPI_ERROR(plist)               acpi_error plist
+#define ACPI_ERROR_ONCE(plist)          ACPI_ONCE(acpi_error, plist)
 #define ACPI_BIOS_WARNING(plist)        acpi_bios_warning plist
 #define ACPI_BIOS_EXCEPTION(plist)      acpi_bios_exception plist
 #define ACPI_BIOS_ERROR(plist)          acpi_bios_error plist
@@ -214,8 +217,10 @@
 
 #define ACPI_INFO(plist)
 #define ACPI_WARNING(plist)
+#define ACPI_WARNING_ONCE(plist)
 #define ACPI_EXCEPTION(plist)
 #define ACPI_ERROR(plist)
+#define ACPI_ERROR_ONCE(plist)
 #define ACPI_BIOS_WARNING(plist)
 #define ACPI_BIOS_EXCEPTION(plist)
 #define ACPI_BIOS_ERROR(plist)
-- 
cgit v1.2.3


From f91f2a9879cc77db1f45f690f38f42698580416e Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 28 Aug 2024 16:34:00 -0700
Subject: dmaengine: idxd: Add a new DSA device ID for Granite Rapids-D
 platform

A new DSA device ID, 0x11fb, is introduced for the Granite Rapids-D
platform. Add the device ID to the IDXD driver.

Since a potential security issue has been fixed on the new device, it's
secure to assign the device to virtual machines, and therefore, the new
device ID will not be added to the VFIO denylist. Additionally, the new
device ID may be useful in identifying and addressing any other potential
issues with this specific device in the future. The same is also applied
to any other new DSA/IAA devices with new device IDs.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20240828233401.186007-2-fenghua.yu@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e388c8b1cbc2..ff99047dac44 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2706,6 +2706,7 @@
 #define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_SST_TNG	0x119a
+#define PCI_DEVICE_ID_INTEL_DSA_GNRD	0x11fb
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
 #define PCI_DEVICE_ID_INTEL_82437	0x122d
 #define PCI_DEVICE_ID_INTEL_82371FB_0	0x122e
-- 
cgit v1.2.3


From 4fecf944c051ea852e98c062636bf5b4c7f5f8a7 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 28 Aug 2024 16:34:01 -0700
Subject: dmaengine: idxd: Add new DSA and IAA device IDs for Diamond Rapids
 platform

A new DSA device ID, 0x1212, and a new IAA device ID, 0x1216, are
introduced for Diamond Rapids platform. Add the device IDs to the IDXD
driver.

The name "IAA" is used in new code instead of the old name "IAX".
However, the "IAX" naming (e.g., IDXD_TYPE_IAX) is retained for legacy
code compatibility.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20240828233401.186007-3-fenghua.yu@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ff99047dac44..8139231d0e86 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2707,6 +2707,8 @@
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_SST_TNG	0x119a
 #define PCI_DEVICE_ID_INTEL_DSA_GNRD	0x11fb
+#define PCI_DEVICE_ID_INTEL_DSA_DMR	0x1212
+#define PCI_DEVICE_ID_INTEL_IAA_DMR	0x1216
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
 #define PCI_DEVICE_ID_INTEL_82437	0x122d
 #define PCI_DEVICE_ID_INTEL_82371FB_0	0x122e
-- 
cgit v1.2.3


From dd067afe3f8cbe548fb8ac76eaf6e9adfea013b2 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Wed, 24 Apr 2024 10:02:31 +0200
Subject: ACPICA: Add support for Windows 11 22H2 _OSI string

ACPICA commit f56218c4e4dc1d1f699662d0726ad9e7a0d58548

See: https://github.com/microsoft_docs/windows-driver-docs/commit/be9d1c211adf8fabe5a43de71c034b1b6d7372de

Link: https://github.com/acpica/acpica/commit/f56218c4
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 85c2dcf2b704..80767e8bf3ad 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1311,6 +1311,7 @@ typedef enum {
 #define ACPI_OSI_WIN_10_19H1            0x14
 #define ACPI_OSI_WIN_10_20H1            0x15
 #define ACPI_OSI_WIN_11                 0x16
+#define ACPI_OSI_WIN_11_22H2            0x17
 
 /* Definitions of getopt */
 
-- 
cgit v1.2.3


From 6143f9616627ddbfdf827795745fa4d9db166bc2 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 8 Aug 2024 16:18:01 -0700
Subject: ACPICA: HMAT: Add extended linear address mode to MSCIS

ACPICA commit aaa08569b81aa4d9ff59f91f00e589e98d499e6c

Redefine the 2 reserved bytes at offset 28 of Memory Side Cache Information
Structure as "Address Mode" and add defines of the new value.

    * 0 - Reserved (Unkown Address Mode)
    * 1 - Extended-linear (N direct-map aliases linearly mapped)
    * 2..65535 - Reserved (Unknown Address Mode)

Link: https://github.com/acpica/acpica/commit/aaa08569
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 89f0df489dc3..199afc2cd122 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1796,7 +1796,7 @@ struct acpi_hmat_cache {
 	u32 reserved1;
 	u64 cache_size;
 	u32 cache_attributes;
-	u16 reserved2;
+	u16 address_mode;
 	u16 number_of_SMBIOShandles;
 };
 
@@ -1808,6 +1808,9 @@ struct acpi_hmat_cache {
 #define ACPI_HMAT_WRITE_POLICY          (0x0000F000)
 #define ACPI_HMAT_CACHE_LINE_SIZE       (0xFFFF0000)
 
+#define ACPI_HMAT_CACHE_MODE_UNKNOWN            (0)
+#define ACPI_HMAT_CACHE_MODE_EXTENDED_LINEAR    (1)
+
 /* Values for cache associativity flag */
 
 #define ACPI_HMAT_CA_NONE                     (0)
-- 
cgit v1.2.3


From 9af32b4a25f59e3a2423a5f87e738a29487242ce Mon Sep 17 00:00:00 2001
From: Saket Dumbre <saket.dumbre@intel.com>
Date: Tue, 27 Aug 2024 11:24:10 -0700
Subject: ACPICA: Setup for ACPICA release 20240827

ACPICA commit 86c762afe5bf915e8101a0455513368e2a60dd80

Link: https://github.com/acpica/acpica/commit/86c762af
Signed-off-by: Saket Dumbre <saket.dumbre@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 99bb45a46600..6c1cd90efae8 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20240322
+#define ACPI_CA_VERSION                 0x20240827
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 6f606ffd6dd7583d8194ee3d858ba4da2eff26a3 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Thu, 29 Aug 2024 14:08:23 -0700
Subject: bpf: Move insn_buf[16] to bpf_verifier_env

This patch moves the 'struct bpf_insn insn_buf[16]' stack usage
to the bpf_verifier_env. A '#define INSN_BUF_SIZE 16' is also added
to replace the ARRAY_SIZE(insn_buf) usages.

Both convert_ctx_accesses() and do_misc_fixup() are changed
to use the env->insn_buf.

It is a refactoring work for adding the epilogue_buf[16] in a later patch.

With this patch, the stack size usage decreased.

Before:
./kernel/bpf/verifier.c:22133:5: warning: stack frame size (2584)

After:
./kernel/bpf/verifier.c:22184:5: warning: stack frame size (2264)

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240829210833.388152-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 279b4a640644..0ad2d189c546 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -23,6 +23,8 @@
  * (in the "-8,-16,...,-512" form)
  */
 #define TMP_STR_BUF_LEN 320
+/* Patch buffer size */
+#define INSN_BUF_SIZE 16
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
@@ -780,6 +782,7 @@ struct bpf_verifier_env {
 	 * e.g., in reg_type_str() to generate reg_type string
 	 */
 	char tmp_str_buf[TMP_STR_BUF_LEN];
+	struct bpf_insn insn_buf[INSN_BUF_SIZE];
 };
 
 static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
-- 
cgit v1.2.3


From 169c31761c8d7f606f3ee628829c27998626c4f0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Thu, 29 Aug 2024 14:08:25 -0700
Subject: bpf: Add gen_epilogue to bpf_verifier_ops

This patch adds a .gen_epilogue to the bpf_verifier_ops. It is similar
to the existing .gen_prologue. Instead of allowing a subsystem
to run code at the beginning of a bpf prog, it allows the subsystem
to run code just before the bpf prog exit.

One of the use case is to allow the upcoming bpf qdisc to ensure that
the skb->dev is the same as the qdisc->dev_queue->dev. The bpf qdisc
struct_ops implementation could either fix it up or drop the skb.
Another use case could be in bpf_tcp_ca.c to enforce snd_cwnd
has sane value (e.g. non zero).

The epilogue can do the useful thing (like checking skb->dev) if it
can access the bpf prog's ctx. Unlike prologue, r1 may not hold the
ctx pointer. This patch saves the r1 in the stack if the .gen_epilogue
has returned some instructions in the "epilogue_buf".

The existing .gen_prologue is done in convert_ctx_accesses().
The new .gen_epilogue is done in the convert_ctx_accesses() also.
When it sees the (BPF_JMP | BPF_EXIT) instruction, it will be patched
with the earlier generated "epilogue_buf". The epilogue patching is
only done for the main prog.

Only one epilogue will be patched to the main program. When the
bpf prog has multiple BPF_EXIT instructions, a BPF_JA is used
to goto the earlier patched epilogue. Majority of the archs
support (BPF_JMP32 | BPF_JA): x86, arm, s390, risv64, loongarch,
powerpc and arc. This patch keeps it simple and always
use (BPF_JMP32 | BPF_JA). A new macro BPF_JMP32_A is added to
generate the (BPF_JMP32 | BPF_JA) insn.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240829210833.388152-4-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |  2 ++
 include/linux/bpf_verifier.h |  1 +
 include/linux/filter.h       | 10 ++++++++++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dc63083f76b7..6f87fb014fba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -974,6 +974,8 @@ struct bpf_verifier_ops {
 				struct bpf_insn_access_aux *info);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
+	int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
+			    s16 ctx_stack_off);
 	int (*gen_ld_abs)(const struct bpf_insn *orig,
 			  struct bpf_insn *insn_buf);
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 0ad2d189c546..2e20207315a9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -783,6 +783,7 @@ struct bpf_verifier_env {
 	 */
 	char tmp_str_buf[TMP_STR_BUF_LEN];
 	struct bpf_insn insn_buf[INSN_BUF_SIZE];
+	struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
 };
 
 static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b6672ff61407..99b6fc83825b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -437,6 +437,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn)
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Unconditional jumps, gotol pc + imm32 */
+
+#define BPF_JMP32_A(IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
 /* Relative call */
 
 #define BPF_CALL_REL(TGT)					\
-- 
cgit v1.2.3


From 087adb4f0f91ee330446a70af899e6a996e5cc13 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 6 Aug 2024 19:28:46 +0200
Subject: vfs: dodge smp_mb in break_lease and break_deleg in the common case

These inlines show up in the fast path (e.g., in do_dentry_open()) and
induce said full barrier regarding i_flctx access when in most cases the
pointer is NULL.

The pointer can be safely checked before issuing the barrier, dodging it
in most cases as a result.

It is plausible the consume fence would be sufficient, but I don't want
to go audit all callers regarding what they before calling here.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240806172846.886570-1-mjguzik@gmail.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/filelock.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index daee999d05f3..bb44224c6676 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -420,28 +420,38 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 #ifdef CONFIG_FILE_LOCKING
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
+	struct file_lock_context *flctx;
+
 	/*
 	 * Since this check is lockless, we must ensure that any refcounts
 	 * taken are done before checking i_flctx->flc_lease. Otherwise, we
 	 * could end up racing with tasks trying to set a new lease on this
 	 * file.
 	 */
+	flctx = READ_ONCE(inode->i_flctx);
+	if (!flctx)
+		return 0;
 	smp_mb();
-	if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+	if (!list_empty_careful(&flctx->flc_lease))
 		return __break_lease(inode, mode, FL_LEASE);
 	return 0;
 }
 
 static inline int break_deleg(struct inode *inode, unsigned int mode)
 {
+	struct file_lock_context *flctx;
+
 	/*
 	 * Since this check is lockless, we must ensure that any refcounts
 	 * taken are done before checking i_flctx->flc_lease. Otherwise, we
 	 * could end up racing with tasks trying to set a new lease on this
 	 * file.
 	 */
+	flctx = READ_ONCE(inode->i_flctx);
+	if (!flctx)
+		return 0;
 	smp_mb();
-	if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+	if (!list_empty_careful(&flctx->flc_lease))
 		return __break_lease(inode, mode, FL_DELEG);
 	return 0;
 }
-- 
cgit v1.2.3


From 8447d848e1dc6d42d1dcd00f133d7715fc732c47 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Sat, 10 Aug 2024 08:47:53 +0200
Subject: vfs: only read fops once in fops_get/put

In do_dentry_open() the usage is:
	f->f_op = fops_get(inode->i_fop);

In generated asm the compiler emits 2 reads from inode->i_fop instead of
just one.

This popped up due to false-sharing where loads from that offset end up
bouncing a cacheline during parallel open. While this is going to be fixed,
the spurious load does not need to be there.

This makes do_dentry_open() go down from 1177 to 1154 bytes.

fops_put() is patched to maintain some consistency.

No functional changes.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240810064753.1211441-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c0286d479c9d..696c620f90e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2553,10 +2553,17 @@ struct super_block *sget(struct file_system_type *type,
 struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
-#define fops_get(fops) \
-	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
-#define fops_put(fops) \
-	do { if (fops) module_put((fops)->owner); } while(0)
+#define fops_get(fops) ({						\
+	const struct file_operations *_fops = (fops);			\
+	(((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL));	\
+})
+
+#define fops_put(fops) ({						\
+	const struct file_operations *_fops = (fops);			\
+	if (_fops)							\
+		module_put((_fops)->owner);				\
+})
+
 /*
  * This one is to be used *ONLY* from ->open() instances.
  * fops must be non-NULL, pinned down *and* module dependencies
-- 
cgit v1.2.3


From 641bb4394f405cba498b100b44541ffc0aed5be1 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 9 Aug 2024 12:38:56 +0200
Subject: fs: move FMODE_UNSIGNED_OFFSET to fop_flags

This is another flag that is statically set and doesn't need to use up
an FMODE_* bit. Move it to ->fop_flags and free up another FMODE_* bit.

(1) mem_open() used from proc_mem_operations
(2) adi_open() used from adi_fops
(3) drm_open_helper():
    (3.1) accel_open() used from DRM_ACCEL_FOPS
    (3.2) drm_open() used from
    (3.2.1) amdgpu_driver_kms_fops
    (3.2.2) psb_gem_fops
    (3.2.3) i915_driver_fops
    (3.2.4) nouveau_driver_fops
    (3.2.5) panthor_drm_driver_fops
    (3.2.6) radeon_driver_kms_fops
    (3.2.7) tegra_drm_fops
    (3.2.8) vmwgfx_driver_fops
    (3.2.9) xe_driver_fops
    (3.2.10) DRM_GEM_FOPS
    (3.2.11) DEFINE_DRM_GEM_DMA_FOPS
(4) struct memdev sets fmode flags based on type of device opened. For
    devices using struct mem_fops unsigned offset is used.

Mark all these file operations as FOP_UNSIGNED_OFFSET and add asserts
into the open helper to ensure that the flag is always set.

Link: https://lore.kernel.org/r/20240809-work-fop_unsigned-v1-1-658e054d893e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/drm/drm_accel.h          | 3 ++-
 include/drm/drm_gem.h            | 3 ++-
 include/drm/drm_gem_dma_helper.h | 1 +
 include/linux/fs.h               | 5 +++--
 4 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h
index f4d3784b1dce..41c78b7d712c 100644
--- a/include/drm/drm_accel.h
+++ b/include/drm/drm_accel.h
@@ -28,7 +28,8 @@
 	.poll		= drm_poll,\
 	.read		= drm_read,\
 	.llseek		= noop_llseek, \
-	.mmap		= drm_gem_mmap
+	.mmap		= drm_gem_mmap, \
+	.fop_flags	= FOP_UNSIGNED_OFFSET
 
 /**
  * DEFINE_DRM_ACCEL_FOPS() - macro to generate file operations for accelerators drivers
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index bae4865b2101..d8b86df2ec0d 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -447,7 +447,8 @@ struct drm_gem_object {
 	.poll		= drm_poll,\
 	.read		= drm_read,\
 	.llseek		= noop_llseek,\
-	.mmap		= drm_gem_mmap
+	.mmap		= drm_gem_mmap, \
+	.fop_flags	= FOP_UNSIGNED_OFFSET
 
 /**
  * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers
diff --git a/include/drm/drm_gem_dma_helper.h b/include/drm/drm_gem_dma_helper.h
index a827bde494f6..f2678e7ecb98 100644
--- a/include/drm/drm_gem_dma_helper.h
+++ b/include/drm/drm_gem_dma_helper.h
@@ -267,6 +267,7 @@ unsigned long drm_gem_dma_get_unmapped_area(struct file *filp,
 		.read		= drm_read,\
 		.llseek		= noop_llseek,\
 		.mmap		= drm_gem_mmap,\
+		.fop_flags = FOP_UNSIGNED_OFFSET, \
 		DRM_GEM_DMA_UNMAPPED_AREA_FOPS \
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 696c620f90e0..0145bda465ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -146,8 +146,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Expect random access pattern */
 #define FMODE_RANDOM		((__force fmode_t)(1 << 12))
 
-/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
-#define FMODE_UNSIGNED_OFFSET	((__force fmode_t)(1 << 13))
+/* FMODE_* bit 13 */
 
 /* File is opened with O_PATH; almost nothing can be done with it */
 #define FMODE_PATH		((__force fmode_t)(1 << 14))
@@ -2073,6 +2072,8 @@ struct file_operations {
 #define FOP_DIO_PARALLEL_WRITE	((__force fop_flags_t)(1 << 3))
 /* Contains huge pages */
 #define FOP_HUGE_PAGES		((__force fop_flags_t)(1 << 4))
+/* Treat loff_t as unsigned (e.g., /dev/mem) */
+#define FOP_UNSIGNED_OFFSET	((__force fop_flags_t)(1 << 5))
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
-- 
cgit v1.2.3


From 433f9d76a01056dfeaefc15167b11e514e56f956 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 14 Aug 2024 17:02:31 +0800
Subject: autofs: add per dentry expire timeout

Add ability to set per-dentry mount expire timeout to autofs.

There are two fairly well known automounter map formats, the autofs
format and the amd format (more or less System V and Berkley).

Some time ago Linux autofs added an amd map format parser that
implemented a fair amount of the amd functionality. This was done
within the autofs infrastructure and some functionality wasn't
implemented because it either didn't make sense or required extra
kernel changes. The idea was to restrict changes to be within the
existing autofs functionality as much as possible and leave changes
with a wider scope to be considered later.

One of these changes is implementing the amd options:
1) "unmount", expire this mount according to a timeout (same as the
   current autofs default).
2) "nounmount", don't expire this mount (same as setting the autofs
   timeout to 0 except only for this specific mount) .
3) "utimeout=<seconds>", expire this mount using the specified
   timeout (again same as setting the autofs timeout but only for
   this mount).

To implement these options per-dentry expire timeouts need to be
implemented for autofs indirect mounts. This is because all map keys
(mounts) for autofs indirect mounts use an expire timeout stored in
the autofs mount super block info. structure and all indirect mounts
use the same expire timeout.

Now I have a request to add the "nounmount" option so I need to add
the per-dentry expire handling to the kernel implementation to do this.

The implementation uses the trailing path component to identify the
mount (and is also used as the autofs map key) which is passed in the
autofs_dev_ioctl structure path field. The expire timeout is passed
in autofs_dev_ioctl timeout field (well, of the timeout union).

If the passed in timeout is equal to -1 the per-dentry timeout and
flag are cleared providing for the "unmount" option. If the timeout
is greater than or equal to 0 the timeout is set to the value and the
flag is also set. If the dentry timeout is 0 the dentry will not expire
by timeout which enables the implementation of the "nounmount" option
for the specific mount. When the dentry timeout is greater than zero it
allows for the implementation of the "utimeout=<seconds>" option.

Signed-off-by: Ian Kent <raven@themaw.net>
Link: https://lore.kernel.org/r/20240814090231.963520-1-raven@themaw.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/auto_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 1f7925afad2d..8081df849743 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		5
+#define AUTOFS_PROTO_SUBVERSION		6
 
 /*
  * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
-- 
cgit v1.2.3


From 1c48d441468c425e878d81c5c3e7ff8a9a594cc0 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 16 Aug 2024 16:35:52 +0200
Subject: inode: remove __I_DIO_WAKEUP

Afaict, we can just rely on inode->i_dio_count for waiting instead of
this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
and xfstests dio tests.

Link: https://lore.kernel.org/r/20240816-vfs-misc-dio-v1-1-80fe21a2c710@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0145bda465ff..8f8d274929d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2373,8 +2373,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *
  * I_REFERENCED		Marks the inode as recently references on the LRU list.
  *
- * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
- *
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
  *			wb stat updates to grab the i_pages lock.  See
@@ -2409,8 +2407,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 #define I_REFERENCED		(1 << 8)
-#define __I_DIO_WAKEUP		9
-#define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
 #define I_LINKABLE		(1 << 10)
 #define I_DIRTY_TIME		(1 << 11)
 #define I_WB_SWITCH		(1 << 13)
@@ -3227,7 +3223,9 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
 }
 #endif
 
+bool inode_dio_finished(const struct inode *inode);
 void inode_dio_wait(struct inode *inode);
+void inode_dio_wait_interruptible(struct inode *inode);
 
 /**
  * inode_dio_begin - signal start of a direct I/O requests
@@ -3251,7 +3249,7 @@ static inline void inode_dio_begin(struct inode *inode)
 static inline void inode_dio_end(struct inode *inode)
 {
 	if (atomic_dec_and_test(&inode->i_dio_count))
-		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+		wake_up_var(&inode->i_dio_count);
 }
 
 extern void inode_set_flags(struct inode *inode, unsigned int flags,
-- 
cgit v1.2.3


From 029c3f27fe84c5fd29d49a99cc5176433bf12209 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 22 Aug 2024 11:28:38 +0200
Subject: fs: remove unused path_put_init()

This helper has been unused for a while now.

Link: https://lore.kernel.org/r/20240822-bewuchs-werktag-46672b3c0606@brauner
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/path.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/path.h b/include/linux/path.h
index ca073e70decd..7ea389dc764b 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -18,12 +18,6 @@ static inline int path_equal(const struct path *path1, const struct path *path2)
 	return path1->mnt == path2->mnt && path1->dentry == path2->dentry;
 }
 
-static inline void path_put_init(struct path *path)
-{
-	path_put(path);
-	*path = (struct path) { };
-}
-
 /*
  * Cleanup macro for use with __free(path_put). Avoids dereference and
  * copying @path unlike DEFINE_FREE(). path_put() will handle the empty
-- 
cgit v1.2.3


From 71ff58ce3428b2471efae5b00594e892b285c97c Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 22 Aug 2024 11:30:58 +0200
Subject: fs: s/__u32/u32/ for s_fsnotify_mask

The underscore variants are for uapi whereas the non-underscore variants
are for in-kernel consumers.

Link: https://lore.kernel.org/r/20240822-anwerben-nutzung-1cd6c82a565f@brauner
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f8d274929d7..cda1f2545ea0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1266,7 +1266,7 @@ struct super_block {
 	time64_t		   s_time_min;
 	time64_t		   s_time_max;
 #ifdef CONFIG_FSNOTIFY
-	__u32			s_fsnotify_mask;
+	u32			s_fsnotify_mask;
 	struct fsnotify_sb_info	*s_fsnotify_info;
 #endif
 
-- 
cgit v1.2.3


From da18ecbf0fb6fb73e6f9273e7aa15610f148ce17 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 Aug 2024 14:47:35 +0200
Subject: fs: add i_state helpers

The i_state member is an unsigned long so that it can be used with the
wait bit infrastructure which expects unsigned long. This wastes 4 bytes
which we're unlikely to ever use. Switch to using the var event wait
mechanism using the address of the bit. Thanks to Linus for the address
idea.

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index cda1f2545ea0..d35de348d2ec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -744,6 +744,21 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 } __randomize_layout;
 
+/*
+ * Get bit address from inode->i_state to use with wait_var_event()
+ * infrastructre.
+ */
+#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
+
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+					    struct inode *inode, u32 bit);
+
+static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
+{
+	/* Caller is responsible for correct memory barriers. */
+	wake_up_var(inode_state_wait_address(inode, bit));
+}
+
 struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
 
 static inline unsigned int i_blocksize(const struct inode *node)
-- 
cgit v1.2.3


From 2ed634c96ed1d6e4ee0497be176f9980866e81cb Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 Aug 2024 14:47:36 +0200
Subject: fs: reorder i_state bits

so that we can use the first bits to derive unique addresses from
i_state.

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-2-5cd5fd207a57@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d35de348d2ec..a868c9823c10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2410,28 +2410,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *			i_count.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ *
+ * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
+ * upon. There's one free address left.
  */
-#define I_DIRTY_SYNC		(1 << 0)
-#define I_DIRTY_DATASYNC	(1 << 1)
-#define I_DIRTY_PAGES		(1 << 2)
-#define __I_NEW			3
+#define __I_NEW			0
 #define I_NEW			(1 << __I_NEW)
-#define I_WILL_FREE		(1 << 4)
-#define I_FREEING		(1 << 5)
-#define I_CLEAR			(1 << 6)
-#define __I_SYNC		7
+#define __I_SYNC		1
 #define I_SYNC			(1 << __I_SYNC)
-#define I_REFERENCED		(1 << 8)
+#define __I_LRU_ISOLATING	2
+#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
+
+#define I_DIRTY_SYNC		(1 << 3)
+#define I_DIRTY_DATASYNC	(1 << 4)
+#define I_DIRTY_PAGES		(1 << 5)
+#define I_WILL_FREE		(1 << 6)
+#define I_FREEING		(1 << 7)
+#define I_CLEAR			(1 << 8)
+#define I_REFERENCED		(1 << 9)
 #define I_LINKABLE		(1 << 10)
 #define I_DIRTY_TIME		(1 << 11)
-#define I_WB_SWITCH		(1 << 13)
-#define I_OVL_INUSE		(1 << 14)
-#define I_CREATING		(1 << 15)
-#define I_DONTCACHE		(1 << 16)
-#define I_SYNC_QUEUED		(1 << 17)
-#define I_PINNING_NETFS_WB	(1 << 18)
-#define __I_LRU_ISOLATING	19
-#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
+#define I_WB_SWITCH		(1 << 12)
+#define I_OVL_INUSE		(1 << 13)
+#define I_CREATING		(1 << 14)
+#define I_DONTCACHE		(1 << 15)
+#define I_SYNC_QUEUED		(1 << 16)
+#define I_PINNING_NETFS_WB	(1 << 17)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
-- 
cgit v1.2.3


From 0fe340a98b584a31b07c664dc5ff0c923730581e Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 Aug 2024 14:47:38 +0200
Subject: inode: port __I_NEW to var event

Port the __I_NEW mechanism to use the new var event mechanism.

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-4-5cd5fd207a57@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/writeback.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 56b85841ae4c..8f651bb0a1a5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode);
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
 {
-	wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
+	wait_var_event(inode_state_wait_address(inode, __I_NEW),
+		       !(READ_ONCE(inode->i_state) & I_NEW));
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
-- 
cgit v1.2.3


From 2b111edbe0a9c441605be5cfb73001dc98ec686f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 Aug 2024 14:47:40 +0200
Subject: inode: make i_state a u32

Now that we use the wait var event mechanism make i_state a u32 and free
up 4 bytes. This means we currently have two 4 byte holes in struct
inode which we can pack.

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-6-5cd5fd207a57@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a868c9823c10..a1ef8c65d828 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -681,7 +681,8 @@ struct inode {
 #endif
 
 	/* Misc */
-	unsigned long		i_state;
+	u32			i_state;
+	/* 32-bit hole */
 	struct rw_semaphore	i_rwsem;
 
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
-- 
cgit v1.2.3


From 459ca85ae1feff78d1518344df88bb79a092780c Mon Sep 17 00:00:00 2001
From: Julian Sun <sunjunchao2870@gmail.com>
Date: Wed, 28 Aug 2024 16:13:59 +0800
Subject: writeback: Refine the show_inode_state() macro definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the show_inode_state() macro only prints
part of the state of inode->i_state. Let’s improve it
to display more of its state.

Signed-off-by: Julian Sun <sunjunchao2870@gmail.com>
Link: https://lore.kernel.org/r/20240828081359.62429-1-sunjunchao2870@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/writeback.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 54e353c9f919..a261e86e61fa 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -20,7 +20,15 @@
 		{I_CLEAR,		"I_CLEAR"},		\
 		{I_SYNC,		"I_SYNC"},		\
 		{I_DIRTY_TIME,		"I_DIRTY_TIME"},	\
-		{I_REFERENCED,		"I_REFERENCED"}		\
+		{I_REFERENCED,		"I_REFERENCED"},	\
+		{I_LINKABLE,		"I_LINKABLE"},		\
+		{I_WB_SWITCH,		"I_WB_SWITCH"},		\
+		{I_OVL_INUSE,		"I_OVL_INUSE"},		\
+		{I_CREATING,		"I_CREATING"},		\
+		{I_DONTCACHE,		"I_DONTCACHE"},		\
+		{I_SYNC_QUEUED,		"I_SYNC_QUEUED"},	\
+		{I_PINNING_NETFS_WB,	"I_PINNING_NETFS_WB"},	\
+		{I_LRU_ISOLATING,	"I_LRU_ISOLATING"}	\
 	)
 
 /* enums need to be exported to user space */
-- 
cgit v1.2.3


From 2a9f8995f7139a759fe8cb31d9e8a433757228ec Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco.cruz@gmail.com>
Date: Thu, 4 Jul 2024 19:23:18 +0200
Subject: mfd: 88pm80x: Constify read-only regmap structs

`pm800_irq`, `pm805_irq` and `pm805_irq_chip` are not modified and can
be declared as const to move their data to a read-only section.

In order to keep the const modifier for the regmap_irq_chip structures,
the pointer used to reference them must be converted to const as well.

Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com>
Link: https://lore.kernel.org/r/20240704-mfd-const-regmap_config-v2-8-0c8785b1331d@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/88pm80x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h
index def5df6e74bf..551ef1c367d6 100644
--- a/include/linux/mfd/88pm80x.h
+++ b/include/linux/mfd/88pm80x.h
@@ -294,7 +294,7 @@ struct pm80x_chip {
 	struct i2c_client *client;
 	struct i2c_client *companion;
 	struct regmap *regmap;
-	struct regmap_irq_chip *regmap_irq_chip;
+	const struct regmap_irq_chip *regmap_irq_chip;
 	struct regmap_irq_chip_data *irq_data;
 	int type;
 	int irq;
-- 
cgit v1.2.3


From 87b9c9ea01f4fd9193f30cc19bd03b05541ff4cb Mon Sep 17 00:00:00 2001
From: Wilken Gottwalt <wilken.gottwalt@posteo.net>
Date: Mon, 15 Jul 2024 08:17:28 +0000
Subject: mfd: ds1wm: Remove remaining header file

The driver was removed after kernel 6.2 rendering the header file
unused.

Signed-off-by: Wilken Gottwalt <wilken.gottwalt@posteo.net>
Link: https://lore.kernel.org/r/ZpTbGHb6EX2Oe7ok@monster.localdomain
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/ds1wm.h | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 include/linux/mfd/ds1wm.h

(limited to 'include')

diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h
deleted file mode 100644
index 43dfca1c9702..000000000000
--- a/include/linux/mfd/ds1wm.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* MFD cell driver data for the DS1WM driver
- *
- * to be defined in the MFD device that is
- * using this driver for one of his sub devices
- */
-
-struct ds1wm_driver_data {
-	int active_high;
-	int clock_rate;
-	/* in milliseconds, the amount of time to
-	 * sleep following a reset pulse. Zero
-	 * should work if your bus devices recover
-	 * time respects the 1-wire spec since the
-	 * ds1wm implements the precise timings of
-	 * a reset pulse/presence detect sequence.
-	 */
-	unsigned int reset_recover_delay;
-
-	/* Say 1 here for big endian Hardware
-	 * (only relevant with bus-shift > 0
-	 */
-	bool is_hw_big_endian;
-
-	/* left shift of register number to get register address offsett.
-	 * Only 0,1,2 allowed for 8,16 or 32 bit bus width respectively
-	 */
-	unsigned int bus_shift;
-};
-- 
cgit v1.2.3


From 5e9629d0ae977d6f6916d7e519724804e95f0b07 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Tue, 27 Aug 2024 15:51:12 +0100
Subject: drivers/perf: arm_spe: Use perf_allow_kernel() for permissions

Use perf_allow_kernel() for 'pa_enable' (physical addresses),
'pct_enable' (physical timestamps) and context IDs. This means that
perf_event_paranoid is now taken into account and LSM hooks can be used,
which is more consistent with other perf_event_open calls. For example
PERF_SAMPLE_PHYS_ADDR uses perf_allow_kernel() rather than just
perfmon_capable().

This also indirectly fixes the following error message which is
misleading because perf_event_paranoid is not taken into account by
perfmon_capable():

  $ perf record -e arm_spe/pa_enable/

  Error:
  Access to performance monitoring and observability operations is
  limited. Consider adjusting /proc/sys/kernel/perf_event_paranoid
  setting ...

Suggested-by: Al Grant <al.grant@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240827145113.1224604-1-james.clark@linaro.org
Link: https://lore.kernel.org/all/20240807120039.GD37996@noisy.programming.kicks-ass.net/
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf_event.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a8942277dda..e336306b8c08 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1602,13 +1602,7 @@ static inline int perf_is_paranoid(void)
 	return sysctl_perf_event_paranoid > -1;
 }
 
-static inline int perf_allow_kernel(struct perf_event_attr *attr)
-{
-	if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
-		return -EACCES;
-
-	return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
-}
+int perf_allow_kernel(struct perf_event_attr *attr);
 
 static inline int perf_allow_cpu(struct perf_event_attr *attr)
 {
-- 
cgit v1.2.3


From 6c17c7d5936e6af6a5bda9f9de98a5e2ee6e8a6f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 7 Aug 2024 15:19:20 -0300
Subject: iommu: Allow ATS to work on VFs when the PF uses IDENTITY

PCI ATS has a global Smallest Translation Unit field that is located in
the PF but shared by all of the VFs.

The expectation is that the STU will be set to the root port's global STU
capability which is driven by the IO page table configuration of the iommu
HW. Today it becomes set when the iommu driver first enables ATS.

Thus, to enable ATS on the VF, the PF must have already had the correct
STU programmed, even if ATS is off on the PF.

Unfortunately the PF only programs the STU when the PF enables ATS. The
iommu drivers tend to leave ATS disabled when IDENTITY translation is
being used.

Thus we can get into a state where the PF is setup to use IDENTITY with
the DMA API while the VF would like to use VFIO with a PAGING domain and
have ATS turned on. This fails because the PF never loaded a PAGING domain
and so it never setup the STU, and the VF can't do it.

The simplest solution is to have the iommu driver set the ATS STU when it
probes the device. This way the ATS STU is loaded immediately at boot time
to all PFs and there is no issue when a VF comes to use it.

Add a new call pci_prepare_ats() which should be called by iommu drivers
in their probe_device() op for every PCI device if the iommu driver
supports ATS. This will setup the STU based on whatever page size
capability the iommu HW has.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/0-v1-0fb4d2ab6770+7e706-ats_vf_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/pci-ats.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index df54cd5b15db..0e8b74e63767 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -8,6 +8,7 @@
 /* Address Translation Service */
 bool pci_ats_supported(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
+int pci_prepare_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
 int pci_ats_page_aligned(struct pci_dev *dev);
@@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d)
 { return false; }
 static inline int pci_enable_ats(struct pci_dev *d, int ps)
 { return -ENODEV; }
+static inline int pci_prepare_ats(struct pci_dev *dev, int ps)
+{ return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d)
 { return -ENODEV; }
-- 
cgit v1.2.3


From 5c40e050e6ac0218af7c520095729d440cc87e6b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 29 Aug 2024 15:06:40 +0200
Subject: fs: drop GFP_NOFAIL mode from alloc_page_buffers

There is only one called of alloc_page_buffers and it doesn't require
__GFP_NOFAIL so drop this allocation mode.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Link: https://lore.kernel.org/r/20240829130640.1397970-1-mhocko@kernel.org
Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/buffer_head.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 14acf1bbe0ce..7e903457967a 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -199,8 +199,7 @@ void folio_set_bh(struct buffer_head *bh, struct folio *folio,
 		  unsigned long offset);
 struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
 					gfp_t gfp);
-struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-		bool retry);
+struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size);
 struct buffer_head *create_empty_buffers(struct folio *folio,
 		unsigned long blocksize, unsigned long b_state);
 void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
-- 
cgit v1.2.3


From 1a5caec7f80ca2e659c03f45378ee26915f4eda2 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 30 Aug 2024 07:35:12 -0700
Subject: regulator: core: Stub devm_regulator_bulk_get_const() if
 !CONFIG_REGULATOR

When adding devm_regulator_bulk_get_const() I missed adding a stub for
when CONFIG_REGULATOR is not enabled. Under certain conditions (like
randconfig testing) this can cause the compiler to reports errors
like:

  error: implicit declaration of function 'devm_regulator_bulk_get_const';
  did you mean 'devm_regulator_bulk_get_enable'?

Add the stub.

Fixes: 1de452a0edda ("regulator: core: Allow drivers to define their init data as const")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202408301813.TesFuSbh-lkp@intel.com/
Cc: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patch.msgid.link/20240830073511.1.Ib733229a8a19fad8179213c05e1af01b51e42328@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d986ec13092e..b9ce521910a0 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -452,6 +452,14 @@ static inline int of_regulator_bulk_get_all(struct device *dev, struct device_no
 	return 0;
 }
 
+static inline int devm_regulator_bulk_get_const(
+	struct device *dev, int num_consumers,
+	const struct regulator_bulk_data *in_consumers,
+	struct regulator_bulk_data **out_consumers)
+{
+	return 0;
+}
+
 static inline int regulator_bulk_enable(int num_consumers,
 					struct regulator_bulk_data *consumers)
 {
-- 
cgit v1.2.3


From a06c3fad49a50d5d5eb078f93e70f4d3eca5d5a5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Aug 2024 14:01:45 +0100
Subject: drivers/virt: pkvm: Add initial support for running as a protected
 guest

Implement a pKVM protected guest driver to probe the presence of pKVM
and determine the memory protection granule using the HYP_MEMINFO
hypercall.

Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240830130150.8568-3-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm-smccc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 083f85653716..16b6dcc54e02 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -115,6 +115,7 @@
 /* KVM "vendor specific" services */
 #define ARM_SMCCC_KVM_FUNC_FEATURES		0
 #define ARM_SMCCC_KVM_FUNC_PTP			1
+#define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO		2
 #define ARM_SMCCC_KVM_FUNC_FEATURES_2		127
 #define ARM_SMCCC_KVM_NUM_FUNCS			128
 
@@ -137,6 +138,12 @@
 			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
 			   ARM_SMCCC_KVM_FUNC_PTP)
 
+#define ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID			\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_64,				\
+			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
+			   ARM_SMCCC_KVM_FUNC_HYP_MEMINFO)
+
 /* ptp_kvm counter type ID */
 #define KVM_PTP_VIRT_COUNTER			0
 #define KVM_PTP_PHYS_COUNTER			1
-- 
cgit v1.2.3


From ebc59b120c588156feb7ce194a9636584ced18ba Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Aug 2024 14:01:47 +0100
Subject: drivers/virt: pkvm: Hook up mem_encrypt API using pKVM hypercalls

If we detect the presence of pKVM's SHARE and UNSHARE hypercalls, then
register a backend implementation of the mem_encrypt API so that things
like DMA buffers can be shared appropriately with the host.

Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240830130150.8568-5-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm-smccc.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 16b6dcc54e02..9cb7c95920b0 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -116,6 +116,8 @@
 #define ARM_SMCCC_KVM_FUNC_FEATURES		0
 #define ARM_SMCCC_KVM_FUNC_PTP			1
 #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO		2
+#define ARM_SMCCC_KVM_FUNC_MEM_SHARE		3
+#define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE		4
 #define ARM_SMCCC_KVM_FUNC_FEATURES_2		127
 #define ARM_SMCCC_KVM_NUM_FUNCS			128
 
@@ -144,6 +146,18 @@
 			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
 			   ARM_SMCCC_KVM_FUNC_HYP_MEMINFO)
 
+#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID			\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_64,				\
+			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
+			   ARM_SMCCC_KVM_FUNC_MEM_SHARE)
+
+#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID			\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_64,				\
+			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
+			   ARM_SMCCC_KVM_FUNC_MEM_UNSHARE)
+
 /* ptp_kvm counter type ID */
 #define KVM_PTP_VIRT_COUNTER			0
 #define KVM_PTP_PHYS_COUNTER			1
-- 
cgit v1.2.3


From 0f12694958001c96bda811473fdb23f333c6d3ca Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Aug 2024 14:01:49 +0100
Subject: drivers/virt: pkvm: Intercept ioremap using pKVM MMIO_GUARD hypercall

Hook up pKVM's MMIO_GUARD hypercall so that ioremap() and friends will
register the target physical address as MMIO with the hypervisor,
allowing guest exits to that page to be emulated by the host with full
syndrome information.

Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240830130150.8568-7-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm-smccc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 9cb7c95920b0..e93c1f7cea70 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -118,6 +118,7 @@
 #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO		2
 #define ARM_SMCCC_KVM_FUNC_MEM_SHARE		3
 #define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE		4
+#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD		7
 #define ARM_SMCCC_KVM_FUNC_FEATURES_2		127
 #define ARM_SMCCC_KVM_NUM_FUNCS			128
 
@@ -158,6 +159,12 @@
 			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
 			   ARM_SMCCC_KVM_FUNC_MEM_UNSHARE)
 
+#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_FUNC_ID			\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_64,				\
+			   ARM_SMCCC_OWNER_VENDOR_HYP,			\
+			   ARM_SMCCC_KVM_FUNC_MMIO_GUARD)
+
 /* ptp_kvm counter type ID */
 #define KVM_PTP_VIRT_COUNTER			0
 #define KVM_PTP_PHYS_COUNTER			1
-- 
cgit v1.2.3


From 21be9f7110d4c044c2b49bafbd7246335f236221 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Aug 2024 14:01:50 +0100
Subject: arm64: smccc: Reserve block of KVM "vendor" services for pKVM
 hypercalls

pKVM relies on hypercalls to expose services such as memory sharing to
protected guests. Tentatively allocate a block of 58 hypercalls (i.e.
fill the remaining space in the first 64 function IDs) for pKVM usage,
as future extensions such as pvIOMMU support, range-based memory sharing
and validation of assigned devices will require additional services.

Suggested-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/86a5h5yg5y.wl-maz@kernel.org
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240830130150.8568-8-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm-smccc.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index e93c1f7cea70..f59099a213d0 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -115,10 +115,70 @@
 /* KVM "vendor specific" services */
 #define ARM_SMCCC_KVM_FUNC_FEATURES		0
 #define ARM_SMCCC_KVM_FUNC_PTP			1
+/* Start of pKVM hypercall range */
 #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO		2
 #define ARM_SMCCC_KVM_FUNC_MEM_SHARE		3
 #define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE		4
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_5		5
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_6		6
 #define ARM_SMCCC_KVM_FUNC_MMIO_GUARD		7
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_8		8
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_9		9
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_10		10
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_11		11
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_12		12
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_13		13
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_14		14
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_15		15
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_16		16
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_17		17
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_18		18
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_19		19
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_20		20
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_21		21
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_22		22
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_23		23
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_24		24
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_25		25
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_26		26
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_27		27
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_28		28
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_29		29
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_30		30
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_31		31
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_32		32
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_33		33
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_34		34
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_35		35
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_36		36
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_37		37
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_38		38
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_39		39
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_40		40
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_41		41
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_42		42
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_43		43
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_44		44
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_45		45
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_46		46
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_47		47
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_48		48
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_49		49
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_50		50
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_51		51
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_52		52
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_53		53
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_54		54
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_55		55
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_56		56
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_57		57
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_58		58
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_59		59
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_60		60
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_61		61
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62		62
+#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63		63
+/* End of pKVM hypercall range */
 #define ARM_SMCCC_KVM_FUNC_FEATURES_2		127
 #define ARM_SMCCC_KVM_NUM_FUNCS			128
 
-- 
cgit v1.2.3


From 929c81ade6355b87097a2a4886c10750e68626cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Fri, 30 Aug 2024 11:45:57 +0200
Subject: fbdev: Introduce devm_register_framebuffer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a device-managed variant of register_framebuffer() which
automatically unregisters the framebuffer on device destruction.
This can simplify the error handling and resource management in drivers.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index db7d97b10964..abf6643ebcaf 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -601,6 +601,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 /* fbmem.c */
 extern int register_framebuffer(struct fb_info *fb_info);
 extern void unregister_framebuffer(struct fb_info *fb_info);
+extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
 extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx,
 				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-- 
cgit v1.2.3


From d7eafed3223af19add14b67a390ec2b983d890e0 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Wed, 7 Aug 2024 14:04:58 +0100
Subject: drm/msm: Expose expanded UBWC config uapi

This adds extra parameters that affect UBWC tiling that will be used by
the Mesa implementation of VK_EXT_host_image_copy.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/607401/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 include/uapi/drm/msm_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 3fca72f73861..2377147b6af0 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -88,6 +88,8 @@ struct drm_msm_timespec {
 #define MSM_PARAM_VA_SIZE    0x0f  /* RO: size of valid GPU iova range (bytes) */
 #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */
 #define MSM_PARAM_RAYTRACING 0x11 /* RO */
+#define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */
+#define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
-- 
cgit v1.2.3


From 9aee8262445d185960431e972e2d997e6aba3de0 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 11:58:23 +0800
Subject: ARM: OMAP2+: Remove obsoleted declaration for gpmc_onenand_init

The gpmc_onenand_init() have been removed since
commit 2514830b8b8c ("ARM: OMAP2+: Remove gpmc-onenand"), and now
it is useless, so remove it.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20240826035823.4043171-1-cuigaosheng1@huawei.com
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 include/linux/omap-gpmc.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 082841908fe7..c9e3843d2dd5 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -84,13 +84,3 @@ extern void gpmc_read_settings_dt(struct device_node *np,
 struct gpmc_timings;
 struct omap_nand_platform_data;
 struct omap_onenand_platform_data;
-
-#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
-#else
-#define board_onenand_data	NULL
-static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
-{
-	return 0;
-}
-#endif
-- 
cgit v1.2.3


From 8c2bd38b95f75f3d2a08c93e35303e26d480d24e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Aug 2024 14:46:39 +0000
Subject: icmp: change the order of rate limits

ICMP messages are ratelimited :

After the blamed commits, the two rate limiters are applied in this order:

1) host wide ratelimit (icmp_global_allow())

2) Per destination ratelimit (inetpeer based)

In order to avoid side-channels attacks, we need to apply
the per destination check first.

This patch makes the following change :

1) icmp_global_allow() checks if the host wide limit is reached.
   But credits are not yet consumed. This is deferred to 3)

2) The per destination limit is checked/updated.
   This might add a new node in inetpeer tree.

3) icmp_global_consume() consumes tokens if prior operations succeeded.

This means that host wide ratelimit is still effective
in keeping inetpeer tree small even under DDOS.

As a bonus, I removed icmp_global.lock as the fast path
can use a lock-free operation.

Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited")
Fixes: 4cdf507d5452 ("icmp: add a global rate limitation")
Reported-by: Keyu Man <keyu.man@email.ucr.edu>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index c5606cadb1a5..82248813619e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -795,6 +795,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 }
 
 bool icmp_global_allow(void);
+void icmp_global_consume(void);
+
 extern int sysctl_icmp_msgs_per_sec;
 extern int sysctl_icmp_msgs_burst;
 
-- 
cgit v1.2.3


From b056b4cd9178f7a1d5d57f7b48b073c29729ddaa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Aug 2024 14:46:40 +0000
Subject: icmp: move icmp_global.credit and icmp_global.stamp to per netns
 storage

Host wide ICMP ratelimiter should be per netns, to provide better isolation.

Following patch in this series makes the sysctl per netns.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20240829144641.3880376-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip.h         | 4 ++--
 include/net/netns/ipv4.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 82248813619e..d3bca4e83979 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -794,8 +794,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 	ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
 }
 
-bool icmp_global_allow(void);
-void icmp_global_consume(void);
+bool icmp_global_allow(struct net *net);
+void icmp_global_consume(struct net *net);
 
 extern int sysctl_icmp_msgs_per_sec;
 extern int sysctl_icmp_msgs_burst;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5fcd61ada622..54fe7c079fff 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -122,7 +122,8 @@ struct netns_ipv4 {
 	u8 sysctl_icmp_errors_use_inbound_ifaddr;
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
-
+	atomic_t icmp_global_credit;
+	u32 icmp_global_stamp;
 	u32 ip_rt_min_pmtu;
 	int ip_rt_mtu_expires;
 	int ip_rt_min_advmss;
-- 
cgit v1.2.3


From f17bf505ff89595df5147755e51441632a5dc563 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Aug 2024 14:46:41 +0000
Subject: icmp: icmp_msgs_per_sec and icmp_msgs_burst sysctls become per netns

Previous patch made ICMP rate limits per netns, it makes sense
to allow each netns to change the associated sysctl.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20240829144641.3880376-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip.h         | 3 ---
 include/net/netns/ipv4.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d3bca4e83979..1ee472fa8b37 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -797,9 +797,6 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 bool icmp_global_allow(struct net *net);
 void icmp_global_consume(struct net *net);
 
-extern int sysctl_icmp_msgs_per_sec;
-extern int sysctl_icmp_msgs_burst;
-
 #ifdef CONFIG_PROC_FS
 int ip_misc_proc_init(void);
 #endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 54fe7c079fff..276f622f3516 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -122,6 +122,8 @@ struct netns_ipv4 {
 	u8 sysctl_icmp_errors_use_inbound_ifaddr;
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
+	int sysctl_icmp_msgs_per_sec;
+	int sysctl_icmp_msgs_burst;
 	atomic_t icmp_global_credit;
 	u32 icmp_global_stamp;
 	u32 ip_rt_min_pmtu;
-- 
cgit v1.2.3


From ddc94d0b17e8ea8179ecbbefacac3fba0fb77265 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 26 Aug 2024 10:01:43 -0700
Subject: dma-buf: Split out dma fence array create into alloc and arm
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Useful to preallocate dma fence array and then arm in path of reclaim or
a dma fence.

v2:
 - s/arm/init (Christian)
 - Drop !array warn (Christian)
v3:
 - Fix kernel doc typos (dim)

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240826170144.2492062-2-matthew.brost@intel.com
---
 include/linux/dma-fence-array.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 29c5650c1038..079b3dec0a16 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -79,6 +79,12 @@ to_dma_fence_array(struct dma_fence *fence)
 	for (index = 0, fence = dma_fence_array_first(head); fence;	\
 	     ++(index), fence = dma_fence_array_next(head, index))
 
+struct dma_fence_array *dma_fence_array_alloc(int num_fences);
+void dma_fence_array_init(struct dma_fence_array *array,
+			  int num_fences, struct dma_fence **fences,
+			  u64 context, unsigned seqno,
+			  bool signal_on_any);
+
 struct dma_fence_array *dma_fence_array_create(int num_fences,
 					       struct dma_fence **fences,
 					       u64 context, unsigned seqno,
-- 
cgit v1.2.3


From 1abab1ba0775036bb67c6c57945c637be644c04f Mon Sep 17 00:00:00 2001
From: Chen Ridong <chenridong@huawei.com>
Date: Fri, 30 Aug 2024 10:02:28 +0000
Subject: cgroup/cpuset: guard cpuset-v1 code under CONFIG_CPUSETS_V1

This patch introduces CONFIG_CPUSETS_V1 and guard cpuset-v1 code under
CONFIG_CPUSETS_V1. The default value of CONFIG_CPUSETS_V1 is N, so that
user who adopted v2 don't have 'pay' for cpuset v1.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpuset.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 2a6981eeebf8..835e7b793f6a 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -99,6 +99,7 @@ static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 					  const struct task_struct *tsk2);
 
+#ifdef CONFIG_CPUSETS_V1
 #define cpuset_memory_pressure_bump() 				\
 	do {							\
 		if (cpuset_memory_pressure_enabled)		\
@@ -106,6 +107,9 @@ extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 	} while (0)
 extern int cpuset_memory_pressure_enabled;
 extern void __cpuset_memory_pressure_bump(void);
+#else
+static inline void cpuset_memory_pressure_bump(void) { }
+#endif
 
 extern void cpuset_task_status_allowed(struct seq_file *m,
 					struct task_struct *task);
-- 
cgit v1.2.3


From 7eba264a3c102b6c006aa429f7eb25d2b8533da7 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 29 Aug 2024 17:10:49 +0100
Subject: mac802154: Correct spelling in mac802154.h

Correct spelling in mac802154.h.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Message-ID: <20240829-wpan-spell-v1-1-799d840e02c4@kernel.org>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/mac802154.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 4a3a9de9da73..1b5488fa2ff0 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -140,7 +140,7 @@ enum ieee802154_hw_flags {
  *
  * xmit_sync:
  *	  Handler that 802.15.4 module calls for each transmitted frame.
- *	  skb cntains the buffer starting from the IEEE 802.15.4 header.
+ *	  skb contains the buffer starting from the IEEE 802.15.4 header.
  *	  The low-level driver should send the frame based on available
  *	  configuration. This is called by a workqueue and useful for
  *	  synchronous 802.15.4 drivers.
@@ -152,7 +152,7 @@ enum ieee802154_hw_flags {
  *
  * xmit_async:
  *	  Handler that 802.15.4 module calls for each transmitted frame.
- *	  skb cntains the buffer starting from the IEEE 802.15.4 header.
+ *	  skb contains the buffer starting from the IEEE 802.15.4 header.
  *	  The low-level driver should send the frame based on available
  *	  configuration.
  *	  This function should return zero or negative errno.
-- 
cgit v1.2.3


From 3682c302e72d71abeb17a81a6d29f281b22928e2 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 29 Aug 2024 17:10:50 +0100
Subject: ieee802154: Correct spelling in nl802154.h

Correct spelling in nl802154.h.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Message-ID: <20240829-wpan-spell-v1-2-799d840e02c4@kernel.org>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/nl802154.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 4c752f799957..a994dea74596 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -192,7 +192,7 @@ enum nl802154_iftype {
  * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for
  *	nl802154_wpan_phy_tx_power
  * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level
- * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maxmimum value for cca_ed_level
+ * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level
  * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags
  * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags
  * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value
-- 
cgit v1.2.3


From c898f6d7b093bd71e66569cd6797c87d4056f44b Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 26 Aug 2024 15:47:30 -0400
Subject: Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once

This introduces hci_cmd_sync_run/hci_cmd_sync_run_once which acts like
hci_cmd_sync_queue/hci_cmd_sync_queue_once but runs immediately when
already on hdev->cmd_sync_work context.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 75e052909b5f..f3052cb252ef 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -73,6 +73,10 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 		       void *data, hci_cmd_sync_work_destroy_t destroy);
 int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 			    void *data, hci_cmd_sync_work_destroy_t destroy);
+int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+		     void *data, hci_cmd_sync_work_destroy_t destroy);
+int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+			  void *data, hci_cmd_sync_work_destroy_t destroy);
 struct hci_cmd_sync_work_entry *
 hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 			  void *data, hci_cmd_sync_work_destroy_t destroy);
-- 
cgit v1.2.3


From 532f8bcd1c2c4e8112f62e1922fd1703bc0ffce0 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 27 Aug 2024 14:37:22 -0400
Subject: Revert "Bluetooth: MGMT/SMP: Fix address type when using SMP over
 BREDR/LE"

This reverts commit 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 which
breaks compatibility with commands like:

bluetoothd[46328]: @ MGMT Command: Load.. (0x0013) plen 74  {0x0001} [hci0]
        Keys: 2
        BR/EDR Address: C0:DC:DA:A5:E5:47 (Samsung Electronics Co.,Ltd)
        Key type: Authenticated key from P-256 (0x03)
        Central: 0x00
        Encryption size: 16
        Diversifier[2]: 0000
        Randomizer[8]: 0000000000000000
        Key[16]: 6ed96089bd9765be2f2c971b0b95f624
        LE Address: D7:2A:DE:1E:73:A2 (Static)
        Key type: Unauthenticated key from P-256 (0x02)
        Central: 0x00
        Encryption size: 16
        Diversifier[2]: 0000
        Randomizer[8]: 0000000000000000
        Key[16]: 87dd2546ededda380ffcdc0a8faa4597
@ MGMT Event: Command Status (0x0002) plen 3                {0x0001} [hci0]
      Load Long Term Keys (0x0013)
        Status: Invalid Parameters (0x0d)

Cc: stable@vger.kernel.org
Link: https://github.com/bluez/bluez/issues/875
Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e449dba698f3..1a32e602630e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -186,7 +186,6 @@ struct blocked_key {
 struct smp_csrk {
 	bdaddr_t bdaddr;
 	u8 bdaddr_type;
-	u8 link_type;
 	u8 type;
 	u8 val[16];
 };
@@ -196,7 +195,6 @@ struct smp_ltk {
 	struct rcu_head rcu;
 	bdaddr_t bdaddr;
 	u8 bdaddr_type;
-	u8 link_type;
 	u8 authenticated;
 	u8 type;
 	u8 enc_size;
@@ -211,7 +209,6 @@ struct smp_irk {
 	bdaddr_t rpa;
 	bdaddr_t bdaddr;
 	u8 addr_type;
-	u8 link_type;
 	u8 val[16];
 };
 
@@ -219,8 +216,6 @@ struct link_key {
 	struct list_head list;
 	struct rcu_head rcu;
 	bdaddr_t bdaddr;
-	u8 bdaddr_type;
-	u8 link_type;
 	u8 type;
 	u8 val[HCI_LINK_KEY_SIZE];
 	u8 pin_len;
-- 
cgit v1.2.3


From ff95cb5e521b60d046e6571ada697a0977b189c3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 29 Aug 2024 09:54:51 +0300
Subject: ipv4: Unmask upper DSCP bits in ip_sock_rt_tos()

The function is used to read the DS field that was stored in IPv4
sockets via the IP_TOS socket option so that it could be used to
initialize the flowi4_tos field before resolving an output route.

Unmask the upper DSCP bits so that in the future the output route lookup
could be performed according to the full DSCP value.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 93833cfe9c96..b896f086ec8e 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -27,6 +27,7 @@
 #include <net/ip_fib.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
+#include <net/inet_dscp.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -45,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)
 
 static inline __u8 ip_sock_rt_tos(const struct sock *sk)
 {
-	return RT_TOS(READ_ONCE(inet_sk(sk)->tos));
+	return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK;
 }
 
 struct ip_tunnel_info;
-- 
cgit v1.2.3


From 356d054a4967e6190ee558b8e839fad3e9db35ec Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 29 Aug 2024 09:54:52 +0300
Subject: ipv4: Unmask upper DSCP bits in get_rttos()

The function is used by a few socket types to retrieve the TOS value
with which to perform the FIB lookup for packets sent through the socket
(flowi4_tos). If a DS field was passed using the IP_TOS control message,
then it is used. Otherwise the one specified via the IP_TOS socket
option.

Unmask the upper DSCP bits so that in the future the lookup could be
performed according to the full DSCP value.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 1ee472fa8b37..d92d3bc3ec0e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -33,6 +33,7 @@
 #include <net/flow_dissector.h>
 #include <net/netns/hash.h>
 #include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
 
 #define IPV4_MAX_PMTU		65535U		/* RFC 2675, Section 5.1 */
 #define IPV4_MIN_MTU		68			/* RFC 791 */
@@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
 
 static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
 {
-	return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos));
+	u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
+
+	return dsfield & INET_DSCP_MASK;
 }
 
 /* datagram.c */
-- 
cgit v1.2.3


From b261b2c6c18bcb81d69de011fd991bdfb97259f7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 29 Aug 2024 09:54:54 +0300
Subject: xfrm: Unmask upper DSCP bits in xfrm_get_tos()

The function returns a value that is used to initialize 'flowi4_tos'
before being passed to the FIB lookup API in the following call chain:

xfrm_bundle_create()
	tos = xfrm_get_tos(fl, family)
	xfrm_dst_lookup(..., tos, ...)
		__xfrm_dst_lookup(..., tos, ...)
			xfrm4_dst_lookup(..., tos, ...)
				__xfrm4_dst_lookup(..., tos, ...)
					fl4->flowi4_tos = tos
					__ip_route_output_key(net, fl4)

Unmask the upper DSCP bits so that in the future the output route lookup
could be performed according to the full DSCP value.

Remove IPTOS_RT_MASK since it is no longer used.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index b896f086ec8e..1789f1e6640b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -266,8 +266,6 @@ static inline void ip_rt_put(struct rtable *rt)
 	dst_release(&rt->dst);
 }
 
-#define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
-
 extern const __u8 ip_tos2prio[16];
 
 static inline char rt_tos2priority(u8 tos)
-- 
cgit v1.2.3


From 0328947c50324cf4b2d8b181bf948edb8101f59f Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 28 Aug 2024 21:16:15 +0530
Subject: PCI: endpoint: Assign PCI domain number for endpoint controllers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, PCI endpoint subsystem doesn't assign PCI domain number for the
PCI endpoint controllers. But this domain number could be useful to the EPC
drivers to uniquely identify each controller based on the hardware instance
when there are multiple ones present in an SoC (even multiple RC/EP).

So let's make use of the existing pci_bus_find_domain_nr() API to allocate
domain numbers based on either devicetree (linux,pci-domain) property or
dynamic domain number allocation scheme.

It should be noted that the domain number allocated by this API will be
based on both RC and EP controllers in a SoC. If the 'linux,pci-domain' DT
property is present, then the domain number represents the actual hardware
instance of the PCI endpoint controller. If not, then the domain number
will be allocated based on the PCI EP/RC controller probe order.

If the architecture doesn't support CONFIG_PCI_DOMAINS_GENERIC (rare), then
currently a warning is thrown to indicate that the architecture specific
implementation is needed.

Link: https://lore.kernel.org/linux-pci/20240828-pci-qcom-hotplug-v4-5-263a385fbbcb@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
---
 include/linux/pci-epc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 85bdf2adb760..8e3dcac55dcd 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -128,6 +128,7 @@ struct pci_epc_mem {
  * @group: configfs group representing the PCI EPC device
  * @lock: mutex to protect pci_epc ops
  * @function_num_map: bitmap to manage physical function number
+ * @domain_nr: PCI domain number of the endpoint controller
  * @init_complete: flag to indicate whether the EPC initialization is complete
  *                 or not
  */
@@ -145,6 +146,7 @@ struct pci_epc {
 	/* mutex to protect against concurrent access of EP controller */
 	struct mutex			lock;
 	unsigned long			function_num_map;
+	int				domain_nr;
 	bool				init_complete;
 };
 
-- 
cgit v1.2.3


From cef48236dfe55fa266d505e8a497963a7bc5ef2a Mon Sep 17 00:00:00 2001
From: Chen Hanxiao <chenhx.fnst@fujitsu.com>
Date: Thu, 18 Jul 2024 15:06:16 +0800
Subject: NFS: trace: show TIMEDOUT instead of 0x6e

__nfs_revalidate_inode may return ETIMEDOUT.

print symbol of ETIMEDOUT in nfs trace:

before:
cat-5191 [005] 119.331127: nfs_revalidate_inode_exit: error=-110 (0x6e)

after:
cat-1738 [004] 44.365509: nfs_revalidate_inode_exit: error=-110 (TIMEDOUT)

Signed-off-by: Chen Hanxiao <chenhx.fnst@fujitsu.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/misc/nfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
index 7b221d51133a..c82233e950ac 100644
--- a/include/trace/misc/nfs.h
+++ b/include/trace/misc/nfs.h
@@ -51,6 +51,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
 		{ NFSERR_IO,			"IO" }, \
 		{ NFSERR_NXIO,			"NXIO" }, \
 		{ ECHILD,			"CHILD" }, \
+		{ ETIMEDOUT,			"TIMEDOUT" }, \
 		{ NFSERR_EAGAIN,		"AGAIN" }, \
 		{ NFSERR_ACCES,			"ACCES" }, \
 		{ NFSERR_EXIST,			"EXIST" }, \
-- 
cgit v1.2.3


From 4ed9ef32606386efc562bada891d7baa16fc46b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Jul 2024 17:14:14 +1000
Subject: lockd: discard nlmsvc_timeout

nlmsvc_timeout always has the same value as (nlm_timeout * HZ), so use
that in the one place that nlmsvc_timeout is used.

In truth it *might* not always be the same as nlmsvc_timeout is only set
when lockd is started while nlm_timeout can be set at anytime via
sysctl.  I think this difference it not helpful so removing it is good.

Also remove the test for nlm_timout being 0.  This is not possible -
unless a module parameter is used to set the minimum timeout to 0, and
if that happens then it probably should be honoured.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 1b95fe31051f..61c4b9c41904 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -200,7 +200,7 @@ extern const struct svc_procedure nlmsvc_procedures[24];
 extern const struct svc_procedure nlmsvc_procedures4[24];
 #endif
 extern int			nlmsvc_grace_period;
-extern unsigned long		nlmsvc_timeout;
+extern unsigned long		nlm_timeout;
 extern bool			nsm_use_hostnames;
 extern u32			nsm_local_state;
 
-- 
cgit v1.2.3


From f2b27e1d72527f94f030b6356b3187576e60885b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Jul 2024 17:14:15 +1000
Subject: SUNRPC: make various functions static, or not exported.

Various functions are only used within the sunrpc module, and several
are only use in the one file.  So clean up:

These are marked static, and any EXPORT is removed.
  svc_rcpb_setup()
  svc_rqst_alloc()
  svc_rqst_free()  - also moved before first use
  svc_rpcbind_set_version()
  svc_drop() - also moved to svc.c

These are now not EXPORTed, but are not static.
  svc_authenticate()
  svc_sock_update_bufs()

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h     | 9 ---------
 include/linux/sunrpc/svcauth.h | 1 -
 include/linux/sunrpc/svcsock.h | 2 --
 3 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a7d0406b9ef5..e4fa25fafa97 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -401,17 +401,13 @@ struct svc_procedure {
  */
 int sunrpc_set_pool_mode(const char *val);
 int sunrpc_get_pool_mode(char *val, size_t size);
-int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
 			    int (*threadfn)(void *data));
-struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
-					struct svc_pool *pool, int node);
 bool		   svc_rqst_replace_page(struct svc_rqst *rqstp,
 					 struct page *page);
 void		   svc_rqst_release_pages(struct svc_rqst *rqstp);
-void		   svc_rqst_free(struct svc_rqst *);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *prog,
 				     struct svc_stat *stats,
@@ -446,11 +442,6 @@ int		   svc_generic_rpcbind_set(struct net *net,
 					   u32 version, int family,
 					   unsigned short proto,
 					   unsigned short port);
-int		   svc_rpcbind_set_version(struct net *net,
-					   const struct svc_program *progp,
-					   u32 version, int family,
-					   unsigned short proto,
-					   unsigned short port);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 61c455f1e1f5..63cf6fb26dcc 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -151,7 +151,6 @@ struct auth_ops {
 
 struct svc_xprt;
 
-extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
 extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
 extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 7c78ec6356b9..bf45d9e8492a 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -58,8 +58,6 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
  */
 void		svc_recv(struct svc_rqst *rqstp);
 void		svc_send(struct svc_rqst *rqstp);
-void		svc_drop(struct svc_rqst *);
-void		svc_sock_update_bufs(struct svc_serv *serv);
 int		svc_addsock(struct svc_serv *serv, struct net *net,
 			    const int fd, char *name_return, const size_t len,
 			    const struct cred *cred);
-- 
cgit v1.2.3


From 5fe690a5946442f7f2d08448341dd621367f80bc Mon Sep 17 00:00:00 2001
From: Matthew Cassell <mcassell411@gmail.com>
Date: Mon, 22 Jul 2024 17:13:16 +0000
Subject: mm: add node_reclaim successes to VM event counters

/proc/vmstat currently shows the number of node_reclaim() failures when
vm.zone_reclaim_mode is set appropriately.  It would be convenient to have
the number of successes right next to zone_reclaim_failed (similar to
compaction and migration).

While just a trivially addition to the vmstat file.  It was helpful during
benchmarking to not have to probe node_reclaim() to observe the
success/failure ratio.

Link: https://lkml.kernel.org/r/20240722171316.7517-1-mcassell411@gmail.com
Signed-off-by: Matthew Cassell <mcassell411@gmail.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vm_event_item.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 747943bc8cc2..6fff8ed3b1fd 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -50,6 +50,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGSTEAL_ANON,
 		PGSTEAL_FILE,
 #ifdef CONFIG_NUMA
+		PGSCAN_ZONE_RECLAIM_SUCCESS,
 		PGSCAN_ZONE_RECLAIM_FAILED,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
-- 
cgit v1.2.3


From 3ddc2fefe6f34ec870fcb22f4cd656e53db079f2 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Mon, 22 Jul 2024 18:29:23 +0200
Subject: mm: vmalloc: implement vrealloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "Align kvrealloc() with krealloc()", v2.

Besides the obvious (and desired) difference between krealloc() and
kvrealloc(), there is some inconsistency in their function signatures and
behavior:

 - krealloc() frees the memory when the requested size is zero, whereas
   kvrealloc() simply returns a pointer to the existing allocation.

 - krealloc() behaves like kmalloc() if a NULL pointer is passed, whereas
   kvrealloc() does not accept a NULL pointer at all and, if passed, would fault
   instead.

 - krealloc() is self-contained, whereas kvrealloc() relies on the caller to
   provide the size of the previous allocation.

Inconsistent behavior throughout allocation APIs is error prone, hence
make kvrealloc() behave like krealloc(), which seems superior in all
mentioned aspects.

In order to be able to get rid of kvrealloc()'s oldsize parameter,
introduce vrealloc() and make use of it in kvrealloc().

Making use of vrealloc() in kvrealloc() also provides oppertunities to
grow (and shrink) allocations more efficiently.  For instance, vrealloc()
can be optimized to allocate and map additional pages to grow the
allocation or unmap and free unused pages to shrink the allocation.

Besides the above, those functions are required by Rust's allocator abstractons
[1] (rework based on this series in [2]). With `Vec` or `KVec` respectively,
potentially growing (and shrinking) data structures are rather common.

[1] https://lore.kernel.org/lkml/20240704170738.3621-1-dakr@redhat.com/
[2] https://git.kernel.org/pub/scm/linux/kernel/git/dakr/linux.git/log/?h=rust/mm


This patch (of 2):

Implement vrealloc() analogous to krealloc().

Currently, krealloc() requires the caller to pass the size of the previous
memory allocation, which, instead, should be self-contained.

We attempt to fix this in a subsequent patch which, in order to do so,
requires vrealloc().

Besides that, we need realloc() functions for kernel allocators in Rust
too.  With `Vec` or `KVec` respectively, potentially growing (and
shrinking) data structures are rather common.

[dakr@kernel.org: fix missing nommu implementation]
  Link: https://lkml.kernel.org/r/20240725141227.13954-1-dakr@kernel.org
[dakr@kernel.org: document concurrency restrictions]
  Link: https://lkml.kernel.org/r/20240725125442.4957-1-dakr@kernel.org
[dakr@kernel.org: consider spare memory for __GFP_ZERO]
  Link: https://lkml.kernel.org/r/20240730185049.6244-3-dakr@kernel.org
[dakr@kernel.org: properly document __GFP_ZERO behavior]
  Link: https://lkml.kernel.org/r/20240730185049.6244-4-dakr@kernel.org
Link: https://lkml.kernel.org/r/20240722163111.4766-1-dakr@kernel.org
Link: https://lkml.kernel.org/r/20240722163111.4766-2-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Chandan Babu R <chandan.babu@oracle.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vmalloc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e4a631ec430b..ad2ce7a6ab7a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -189,6 +189,10 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1
 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2);
 #define vcalloc(...)		alloc_hooks(vcalloc_noprof(__VA_ARGS__))
 
+void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags)
+		__realloc_size(2);
+#define vrealloc(...)		alloc_hooks(vrealloc_noprof(__VA_ARGS__))
+
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
 
-- 
cgit v1.2.3


From 590b9d576caec6b4c46bba49ed36223a399c3fc5 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Mon, 22 Jul 2024 18:29:24 +0200
Subject: mm: kvmalloc: align kvrealloc() with krealloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Besides the obvious (and desired) difference between krealloc() and
kvrealloc(), there is some inconsistency in their function signatures and
behavior:

 - krealloc() frees the memory when the requested size is zero, whereas
   kvrealloc() simply returns a pointer to the existing allocation.

 - krealloc() behaves like kmalloc() if a NULL pointer is passed, whereas
   kvrealloc() does not accept a NULL pointer at all and, if passed,
   would fault instead.

 - krealloc() is self-contained, whereas kvrealloc() relies on the caller
   to provide the size of the previous allocation.

Inconsistent behavior throughout allocation APIs is error prone, hence
make kvrealloc() behave like krealloc(), which seems superior in all
mentioned aspects.

Besides that, implementing kvrealloc() by making use of krealloc() and
vrealloc() provides oppertunities to grow (and shrink) allocations more
efficiently.  For instance, vrealloc() can be optimized to allocate and
map additional pages to grow the allocation or unmap and free unused pages
to shrink the allocation.

[dakr@kernel.org: document concurrency restrictions]
  Link: https://lkml.kernel.org/r/20240725125442.4957-1-dakr@kernel.org
[dakr@kernel.org: disable KASAN when switching to vmalloc]
  Link: https://lkml.kernel.org/r/20240730185049.6244-2-dakr@kernel.org
[dakr@kernel.org: properly document __GFP_ZERO behavior]
  Link: https://lkml.kernel.org/r/20240730185049.6244-5-dakr@kernel.org
Link: https://lkml.kernel.org/r/20240722163111.4766-3-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Chandan Babu R <chandan.babu@oracle.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/slab.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index eb2bf4629157..c9cb42203183 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -841,8 +841,8 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
 #define kvcalloc_node(...)			alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__))
 #define kvcalloc(...)				alloc_hooks(kvcalloc_noprof(__VA_ARGS__))
 
-extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
-		      __realloc_size(3);
+void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
+		__realloc_size(2);
 #define kvrealloc(...)				alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
 
 extern void kvfree(const void *addr);
-- 
cgit v1.2.3


From d58a2a581f132529eefac5377676011562b631b8 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 22 Jul 2024 13:43:18 +0800
Subject: mm: shmem: rename shmem_is_huge() to shmem_huge_global_enabled()

shmem_is_huge() is now used to check if the top-level huge page is
enabled, thus rename it to reflect its usage.

Link: https://lkml.kernel.org/r/da53296e0ab6359aa083561d9dc01e4223d60fbe.1721626645.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 1d06b1e5408a..405ee8d3589a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -111,14 +111,15 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 int shmem_unuse(unsigned int type);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
-			  struct mm_struct *mm, unsigned long vm_flags);
+extern bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+				      struct mm_struct *mm, unsigned long vm_flags);
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
 				bool global_huge);
 #else
-static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
-					  struct mm_struct *mm, unsigned long vm_flags)
+static __always_inline bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
+						      bool shmem_huge_force, struct mm_struct *mm,
+						      unsigned long vm_flags)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 6beeab870e70b2d4f49baf6c6be9da1b61c169f8 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 22 Jul 2024 13:43:19 +0800
Subject: mm: shmem: move shmem_huge_global_enabled() into
 shmem_allowable_huge_orders()

Move shmem_huge_global_enabled() into shmem_allowable_huge_orders(), so
that shmem_allowable_huge_orders() can also help to find the allowable
huge orders for tmpfs.  Moreover the shmem_huge_global_enabled() can
become static.  While we are at it, passing the vma instead of mm for
shmem_huge_global_enabled() makes code cleaner.

No functional changes.

Link: https://lkml.kernel.org/r/8e825146bb29ee1a1c7bd64d2968ff3e19be7815.1721626645.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 405ee8d3589a..1564d7d3ca61 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -111,21 +111,13 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 int shmem_unuse(unsigned int type);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, bool shmem_huge_force,
-				      struct mm_struct *mm, unsigned long vm_flags);
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool global_huge);
+				bool shmem_huge_force);
 #else
-static __always_inline bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-						      bool shmem_huge_force, struct mm_struct *mm,
-						      unsigned long vm_flags)
-{
-	return false;
-}
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool global_huge)
+				bool shmem_huge_force)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 4fd568faf6e7e21f206cd66dd4fca9a72e7d922c Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 18 Jul 2024 17:18:21 +0800
Subject: mm: kmem: remove mem_cgroup_from_obj()

There is no user of mem_cgroup_from_obj(), remove it.

Link: https://lkml.kernel.org/r/20240718091821.44740-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0e5bf25d324f..af7da7bd00af 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1717,7 +1717,6 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
-struct mem_cgroup *mem_cgroup_from_obj(void *p);
 struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);
 
 static inline void count_objcg_event(struct obj_cgroup *objcg,
@@ -1780,11 +1779,6 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
-{
-	return NULL;
-}
-
 static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
 {
 	return NULL;
-- 
cgit v1.2.3


From 2a28713a67fd28eedc13b32300df6b9c5a2381f5 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@nvidia.com>
Date: Wed, 24 Jul 2024 09:01:14 -0400
Subject: memory tiering: introduce folio_use_access_time() check

If memory tiering mode is on and a folio is not in the top tier memory,
folio's cpupid field is repurposed to store page access time.  Instead of
an open coded check, use a function to encapsulate the check.

Link: https://lkml.kernel.org/r/20240724130115.793641-3-ziy@nvidia.com
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6549d0979b28..6c2078ca3391 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1745,6 +1745,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
 		__set_bit(pid_bit, &vma->numab_state->pids_active[1]);
 	}
 }
+
+bool folio_use_access_time(struct folio *folio);
 #else /* !CONFIG_NUMA_BALANCING */
 static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
 {
@@ -1798,6 +1800,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
 static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
 {
 }
+static inline bool folio_use_access_time(struct folio *folio)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
-- 
cgit v1.2.3


From c4a6fce85640beb4f79e8217272d1ef79839cc17 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Wed, 24 Jul 2024 20:33:21 +0000
Subject: vmstat: kernel stack usage histogram

As part of the dynamic kernel stack project, we need to know the amount of
data that can be saved by reducing the default kernel stack size [1].

Provide a kernel stack usage histogram to aid in optimizing kernel stack
sizes and minimizing memory waste in large-scale environments.  The
histogram divides stack usage into power-of-two buckets and reports the
results in /proc/vmstat.  This information is especially valuable in
environments with millions of machines, where even small optimizations can
have a significant impact.

The histogram data is presented in /proc/vmstat with entries like
"kstack_1k", "kstack_2k", and so on, indicating the number of threads that
exited with stack usage falling within each respective bucket.

Example outputs:
Intel:
$ grep kstack /proc/vmstat
kstack_1k 3
kstack_2k 188
kstack_4k 11391
kstack_8k 243
kstack_16k 0

ARM with 64K page_size:
$ grep kstack /proc/vmstat
kstack_1k 1
kstack_2k 340
kstack_4k 25212
kstack_8k 1659
kstack_16k 0
kstack_32k 0
kstack_64k 0

Note: once the dynamic kernel stack is implemented it will depend on the
implementation the usability of this feature: On hardware that supports
faults on kernel stacks, we will have other metrics that show the total
number of pages allocated for stacks.  On hardware where faults are not
supported, we will most likely have some optimization where only some
threads are extended, and for those, these metrics will still be very
useful.

[1] https://lwn.net/Articles/974367

Link: https://lkml.kernel.org/r/20240730150158.832783-3-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240724203322.2765486-3-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vm_event_item.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 6fff8ed3b1fd..aae5c7c5cfb4 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -155,6 +155,30 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		VMA_LOCK_RETRY,
 		VMA_LOCK_MISS,
 #endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
+		KSTACK_1K,
+#if THREAD_SIZE > 1024
+		KSTACK_2K,
+#endif
+#if THREAD_SIZE > 2048
+		KSTACK_4K,
+#endif
+#if THREAD_SIZE > 4096
+		KSTACK_8K,
+#endif
+#if THREAD_SIZE > 8192
+		KSTACK_16K,
+#endif
+#if THREAD_SIZE > 16384
+		KSTACK_32K,
+#endif
+#if THREAD_SIZE > 32768
+		KSTACK_64K,
+#endif
+#if THREAD_SIZE > 65536
+		KSTACK_REST,
+#endif
+#endif /* CONFIG_DEBUG_STACK_USAGE */
 		NR_VM_EVENT_ITEMS
 };
 
-- 
cgit v1.2.3


From fbe76a6557a83af5ef3819fd7b7ffd0a5d3b4e51 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Wed, 24 Jul 2024 20:33:22 +0000
Subject: task_stack: uninline stack_not_used

Given that stack_not_used() is not performance critical function
uninline it.

Link: https://lkml.kernel.org/r/20240730150158.832783-4-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240724203322.2765486-4-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/task_stack.h | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index ccd72b978e1f..bf10bdb487dd 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -95,23 +95,11 @@ static inline int object_is_on_stack(const void *obj)
 extern void thread_stack_cache_init(void);
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
+unsigned long stack_not_used(struct task_struct *p);
+#else
 static inline unsigned long stack_not_used(struct task_struct *p)
 {
-	unsigned long *n = end_of_stack(p);
-
-	do { 	/* Skip over canary */
-# ifdef CONFIG_STACK_GROWSUP
-		n--;
-# else
-		n++;
-# endif
-	} while (!*n);
-
-# ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long)end_of_stack(p) - (unsigned long)n;
-# else
-	return (unsigned long)n - (unsigned long)end_of_stack(p);
-# endif
+	return 0;
 }
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
-- 
cgit v1.2.3


From f77bd4b14ccfd38dfcfe67eecad517b8ec1b7f37 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 26 Jul 2024 20:31:08 +0000
Subject: mm: memcg: don't call propagate_protected_usage() needlessly

Patch series "mm: memcg: page counters optimizations", v3.

This patchset contains 3 independent small optimizations of page counters.


This patch (of 3):

Memory protection (min/low) requires a constant tracking of protected
memory usage.  propagate_protected_usage() is called on each page counters
update and does a number of operations even in cases when the actual
memory protection functionality is not supported (e.g.  hugetlb cgroups or
memcg swap counters).

It's obviously inefficient and leads to a waste of CPU cycles.  It can be
addressed by calling propagate_protected_usage() only for the counters
which do support memory guarantees.  As of now it's only memcg->memory -
the unified memory memcg counter.

Link: https://lkml.kernel.org/r/20240726203110.1577216-2-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_counter.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 904c52f97284..0b8e993f9163 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -31,6 +31,7 @@ struct page_counter {
 	/* Keep all the read most fields in a separete cacheline. */
 	CACHELINE_PADDING(_pad2_);
 
+	bool protection_support;
 	unsigned long min;
 	unsigned long low;
 	unsigned long high;
@@ -44,12 +45,17 @@ struct page_counter {
 #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
 #endif
 
+/*
+ * Protection is supported only for the first counter (with id 0).
+ */
 static inline void page_counter_init(struct page_counter *counter,
-				     struct page_counter *parent)
+				     struct page_counter *parent,
+				     bool protection_support)
 {
 	atomic_long_set(&counter->usage, 0);
 	counter->max = PAGE_COUNTER_MAX;
 	counter->parent = parent;
+	counter->protection_support = protection_support;
 }
 
 static inline unsigned long page_counter_read(struct page_counter *counter)
-- 
cgit v1.2.3


From 941ce635234162c420c9185816c59f7d5dd1ad07 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 26 Jul 2024 20:31:09 +0000
Subject: mm: page_counters: put page_counter_calculate_protection() under
 CONFIG_MEMCG

Put page_counter_calculate_protection() under CONFIG_MEMCG.

The protection functionality (min/low limits) is not supported by any
other cgroup subsystem, so page_counter_calculate_protection() and related
static effective_protection() can be compiled out if CONFIG_MEMCG is not
enabled.

Link: https://lkml.kernel.org/r/20240726203110.1577216-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_counter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 0b8e993f9163..1b3e92c09b80 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -87,8 +87,14 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
 	counter->watermark = page_counter_read(counter);
 }
 
+#ifdef CONFIG_MEMCG
 void page_counter_calculate_protection(struct page_counter *root,
 				       struct page_counter *counter,
 				       bool recursive_protection);
+#else
+static inline void page_counter_calculate_protection(struct page_counter *root,
+						     struct page_counter *counter,
+						     bool recursive_protection) {}
+#endif
 
 #endif /* _LINUX_PAGE_COUNTER_H */
-- 
cgit v1.2.3


From 57979fabff554bf4d1edeeee69251b22ca9bf55e Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 26 Jul 2024 20:31:10 +0000
Subject: mm: page_counters: initialize usage using ATOMIC_LONG_INIT() macro

When a page_counter structure is initialized, there is no need to use an
atomic set operation to initialize the usage counter because at this point
the structure is not visible to anybody else.  ATOMIC_LONG_INIT() is what
should be used in such cases.

Link: https://lkml.kernel.org/r/20240726203110.1577216-4-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_counter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 1b3e92c09b80..66ebf9a73158 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -52,7 +52,7 @@ static inline void page_counter_init(struct page_counter *counter,
 				     struct page_counter *parent,
 				     bool protection_support)
 {
-	atomic_long_set(&counter->usage, 0);
+	counter->usage = (atomic_long_t)ATOMIC_LONG_INIT(0);
 	counter->max = PAGE_COUNTER_MAX;
 	counter->parent = parent;
 	counter->protection_support = protection_support;
-- 
cgit v1.2.3


From 394290cba9664ed3ab80d0e247402102a9b7287a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 26 Jul 2024 17:07:26 +0200
Subject: mm: turn USE_SPLIT_PTE_PTLOCKS / USE_SPLIT_PTE_PTLOCKS into Kconfig
 options

Patch series "mm: split PTE/PMD PT table Kconfig cleanups+clarifications".

This series is a follow up to the fixes:
	"[PATCH v1 0/2] mm/hugetlb: fix hugetlb vs. core-mm PT locking"

When working on the fixes, I wondered why 8xx is fine (-> never uses split
PT locks) and how PT locking even works properly with PMD page table
sharing (-> always requires split PMD PT locks).

Let's improve the split PT lock detection, make hugetlb properly depend on
it and make 8xx bail out if it would ever get enabled by accident.

As an alternative to patch #3 we could extend the Kconfig
SPLIT_PTE_PTLOCKS option from patch #2 -- but enforcing it closer to the
code that actually implements it feels a bit nicer for documentation
purposes, and there is no need to actually disable it because it should
always be disabled (!SMP).

Did a bunch of cross-compilations to make sure that split PTE/PMD PT locks
are still getting used where we would expect them.

[1] https://lkml.kernel.org/r/20240725183955.2268884-1-david@redhat.com


This patch (of 3):

Let's clean that up a bit and prepare for depending on
CONFIG_SPLIT_PMD_PTLOCKS in other Kconfig options.

More cleanups would be reasonable (like the arch-specific "depends on" for
CONFIG_SPLIT_PTE_PTLOCKS), but we'll leave that for another day.

Link: https://lkml.kernel.org/r/20240726150728.3159964-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240726150728.3159964-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h            | 8 ++++----
 include/linux/mm_types.h      | 2 +-
 include/linux/mm_types_task.h | 3 ---
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6c2078ca3391..b7000fa300f3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2891,7 +2891,7 @@ static inline void pagetable_free(struct ptdesc *pt)
 	__free_pages(page, compound_order(page));
 }
 
-#if USE_SPLIT_PTE_PTLOCKS
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
 #if ALLOC_SPLIT_PTLOCKS
 void __init ptlock_cache_init(void);
 bool ptlock_alloc(struct ptdesc *ptdesc);
@@ -2949,7 +2949,7 @@ static inline bool ptlock_init(struct ptdesc *ptdesc)
 	return true;
 }
 
-#else	/* !USE_SPLIT_PTE_PTLOCKS */
+#else	/* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
@@ -2964,7 +2964,7 @@ static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
 static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
-#endif /* USE_SPLIT_PTE_PTLOCKS */
+#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
 
 static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
 {
@@ -3024,7 +3024,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
-#if USE_SPLIT_PMD_PTLOCKS
+#if defined(CONFIG_SPLIT_PMD_PTLOCKS)
 
 static inline struct page *pmd_pgtable_page(pmd_t *pmd)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 485424979254..165c58b12ccc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -947,7 +947,7 @@ struct mm_struct {
 #ifdef CONFIG_MMU_NOTIFIER
 		struct mmu_notifier_subscriptions *notifier_subscriptions;
 #endif
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
 		pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index a2f6179b672b..bff5706b76e1 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -16,9 +16,6 @@
 #include <asm/tlbbatch.h>
 #endif
 
-#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
-#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
-		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
 #define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
 
 /*
-- 
cgit v1.2.3


From 188cac58a8bcdf82c7f63275b68f7a46871e45d6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 26 Jul 2024 17:07:27 +0200
Subject: mm/hugetlb: enforce that PMD PT sharing has split PMD PT locks

Sharing page tables between processes but falling back to per-MM page
table locks cannot possibly work.

So, let's make sure that we do have split PMD locks by adding a new
Kconfig option and letting that depend on CONFIG_SPLIT_PMD_PTLOCKS.

Link: https://lkml.kernel.org/r/20240726150728.3159964-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 45bf05ad5c53..9b7bcfce6920 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1251,7 +1251,7 @@ static inline __init void hugetlb_cma_reserve(int order)
 }
 #endif
 
-#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
 static inline bool hugetlb_pmd_shared(pte_t *pte)
 {
 	return page_count(virt_to_page(pte)) > 1;
@@ -1287,8 +1287,7 @@ bool __vma_private_lock(struct vm_area_struct *vma);
 static inline pte_t *
 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
 {
-#if defined(CONFIG_HUGETLB_PAGE) && \
-	defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP)
+#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP)
 	struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
 
 	/*
-- 
cgit v1.2.3


From e5a41fc77771c7c7f6b9bef85a02b38b802b7371 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 29 Jul 2024 20:38:42 +0200
Subject: mm: simplify arch_make_folio_accessible()

Patch series "mm: remove arch_make_page_accessible()".

Now that s390x implements arch_make_folio_accessible(), let's convert
remaining users to use arch_make_folio_accessible() instead so we can
remove arch_make_page_accessible().


This patch (of 3):

Now that s390x implements HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE, let's turn
generic arch_make_folio_accessible() into a NOP: there are no other
targets that implement HAVE_ARCH_MAKE_PAGE_ACCESSIBLE but not
HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE.

Link: https://lkml.kernel.org/r/20240729183844.388481-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240729183844.388481-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7000fa300f3..fc7bd3864bcd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2215,16 +2215,7 @@ static inline int arch_make_page_accessible(struct page *page)
 #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
 static inline int arch_make_folio_accessible(struct folio *folio)
 {
-	int ret;
-	long i, nr = folio_nr_pages(folio);
-
-	for (i = 0; i < nr; i++) {
-		ret = arch_make_page_accessible(folio_page(folio, i));
-		if (ret)
-			break;
-	}
-
-	return ret;
+	return 0;
 }
 #endif
 
-- 
cgit v1.2.3


From 3290ef3c7f2a8171fc534e02fb95a512eeae689a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 29 Jul 2024 20:38:44 +0200
Subject: s390/uv: drop arch_make_page_accessible()

All code was converted to using arch_make_folio_accessible(), let's drop
arch_make_page_accessible().

Link: https://lkml.kernel.org/r/20240729183844.388481-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fc7bd3864bcd..153a4662a0cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2205,13 +2205,6 @@ static inline bool folio_likely_mapped_shared(struct folio *folio)
 	return atomic_read(&folio->_mapcount) > 0;
 }
 
-#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
-{
-	return 0;
-}
-#endif
-
 #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
 static inline int arch_make_folio_accessible(struct folio *folio)
 {
-- 
cgit v1.2.3


From c6f53ed8f213a66ae8bc40aa9112c32412c35a21 Mon Sep 17 00:00:00 2001
From: David Finkel <davidf@vimeo.com>
Date: Mon, 29 Jul 2024 10:37:42 -0400
Subject: mm, memcg: cg2 memory{.swap,}.peak write handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm, memcg: cg2 memory{.swap,}.peak write handlers", v7.


This patch (of 2):

Other mechanisms for querying the peak memory usage of either a process or
v1 memory cgroup allow for resetting the high watermark.  Restore parity
with those mechanisms, but with a less racy API.

For example:
 - Any write to memory.max_usage_in_bytes in a cgroup v1 mount resets
   the high watermark.
 - writing "5" to the clear_refs pseudo-file in a processes's proc
   directory resets the peak RSS.

This change is an evolution of a previous patch, which mostly copied the
cgroup v1 behavior, however, there were concerns about races/ownership
issues with a global reset, so instead this change makes the reset
filedescriptor-local.

Writing any non-empty string to the memory.peak and memory.swap.peak
pseudo-files reset the high watermark to the current usage for subsequent
reads through that same FD.

Notably, following Johannes's suggestion, this implementation moves the
O(FDs that have written) behavior onto the FD write(2) path.  Instead, on
the page-allocation path, we simply add one additional watermark to
conditionally bump per-hierarchy level in the page-counter.

Additionally, this takes Longman's suggestion of nesting the
page-charging-path checks for the two watermarks to reduce the number of
common-case comparisons.

This behavior is particularly useful for work scheduling systems that need
to track memory usage of worker processes/cgroups per-work-item.  Since
memory can't be squeezed like CPU can (the OOM-killer has opinions), these
systems need to track the peak memory usage to compute system/container
fullness when binpacking workitems.

Most notably, Vimeo's use-case involves a system that's doing global
binpacking across many Kubernetes pods/containers, and while we can use
PSI for some local decisions about overload, we strive to avoid packing
workloads too tightly in the first place.  To facilitate this, we track
the peak memory usage.  However, since we run with long-lived workers (to
amortize startup costs) we need a way to track the high watermark while a
work-item is executing.  Polling runs the risk of missing short spikes
that last for timescales below the polling interval, and peak memory
tracking at the cgroup level is otherwise perfect for this use-case.

As this data is used to ensure that binpacked work ends up with sufficient
headroom, this use-case mostly avoids the inaccuracies surrounding
reclaimable memory.

Link: https://lkml.kernel.org/r/20240730231304.761942-1-davidf@vimeo.com
Link: https://lkml.kernel.org/r/20240729143743.34236-1-davidf@vimeo.com
Link: https://lkml.kernel.org/r/20240729143743.34236-2-davidf@vimeo.com
Signed-off-by: David Finkel <davidf@vimeo.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Waiman Long <longman@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cgroup-defs.h  |  5 +++++
 include/linux/cgroup.h       |  3 +++
 include/linux/memcontrol.h   |  5 +++++
 include/linux/page_counter.h | 11 ++++++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ae04035b6cbe..7fc2d0195f56 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -775,6 +775,11 @@ struct cgroup_subsys {
 
 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
 
+struct cgroup_of_peak {
+	unsigned long		value;
+	struct list_head	list;
+};
+
 /**
  * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
  * @tsk: target task
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c60ba0ab1462..3e0563753cc3 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/nodemask.h>
+#include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/cgroupstats.h>
 #include <linux/fs.h>
@@ -854,4 +855,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 
 struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);
 
+struct cgroup_of_peak *of_peak(struct kernfs_open_file *of);
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index af7da7bd00af..1b79760af685 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -193,6 +193,11 @@ struct mem_cgroup {
 		struct page_counter memsw;	/* v1 only */
 	};
 
+	/* registered local peak watchers */
+	struct list_head memory_peaks;
+	struct list_head swap_peaks;
+	spinlock_t	 peaks_lock;
+
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
 
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 66ebf9a73158..79dbd8bc35a7 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -26,6 +26,8 @@ struct page_counter {
 	atomic_long_t children_low_usage;
 
 	unsigned long watermark;
+	/* Latest cg2 reset watermark */
+	unsigned long local_watermark;
 	unsigned long failcnt;
 
 	/* Keep all the read most fields in a separete cacheline. */
@@ -84,7 +86,14 @@ int page_counter_memparse(const char *buf, const char *max,
 
 static inline void page_counter_reset_watermark(struct page_counter *counter)
 {
-	counter->watermark = page_counter_read(counter);
+	unsigned long usage = page_counter_read(counter);
+
+	/*
+	 * Update local_watermark first, so it's always <= watermark
+	 * (modulo CPU/compiler re-ordering)
+	 */
+	counter->local_watermark = usage;
+	counter->watermark = usage;
 }
 
 #ifdef CONFIG_MEMCG
-- 
cgit v1.2.3


From a17c7d8fd2b097a422fc892b660e879e19c20214 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 29 Jul 2024 12:50:35 +0100
Subject: userfaultfd: move core VMA manipulation logic to mm/userfaultfd.c

Patch series "Make core VMA operations internal and testable", v4.

There are a number of "core" VMA manipulation functions implemented in
mm/mmap.c, notably those concerning VMA merging, splitting, modifying,
expanding and shrinking, which logically don't belong there.

More importantly this functionality represents an internal implementation
detail of memory management and should not be exposed outside of mm/
itself.

This patch series isolates core VMA manipulation functionality into its
own file, mm/vma.c, and provides an API to the rest of the mm code in
mm/vma.h.

Importantly, it also carefully implements mm/vma_internal.h, which
specifies which headers need to be imported by vma.c, leading to the very
useful property that vma.c depends only on mm/vma.h and mm/vma_internal.h.

This means we can then re-implement vma_internal.h in userland, adding
shims for kernel mechanisms as required, allowing us to unit test internal
VMA functionality.

This testing is useful as opposed to an e.g.  kunit implementation as this
way we can avoid all external kernel side-effects while testing, run tests
VERY quickly, and iterate on and debug problems quickly.

Excitingly this opens the door to, in the future, recreating precise
problems observed in production in userland and very quickly debugging
problems that might otherwise be very difficult to reproduce.

This patch series takes advantage of existing shim logic and full userland
maple tree support contained in tools/testing/radix-tree/ and
tools/include/linux/, separating out shared components of the radix tree
implementation to provide this testing.

Kernel functionality is stubbed and shimmed as needed in
tools/testing/vma/ which contains a fully functional userland
vma_internal.h file and which imports mm/vma.c and mm/vma.h to be directly
tested from userland.

A simple, skeleton testing implementation is provided in
tools/testing/vma/vma.c as a proof-of-concept, asserting that simple VMA
merge, modify (testing split), expand and shrink functionality work
correctly.


This patch (of 4):

This patch forms part of a patch series intending to separate out VMA
logic and render it testable from userspace, which requires that core
manipulation functions be exposed in an mm/-internal header file.

In order to do this, we must abstract APIs we wish to test, in this
instance functions which ultimately invoke vma_modify().

This patch therefore moves all logic which ultimately invokes vma_modify()
to mm/userfaultfd.c, trying to transfer code at a functional granularity
where possible.

[lorenzo.stoakes@oracle.com: fix user-after-free in userfaultfd_clear_vma()]
  Link: https://lkml.kernel.org/r/3c947ddc-b804-49b7-8fe9-3ea3ca13def5@lucifer.local
Link: https://lkml.kernel.org/r/cover.1722251717.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/50c3ed995fd81c45876c86304c8a00bf3e396cfd.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index a12bcf042551..9fc6ce15c499 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -267,6 +267,25 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
 extern bool userfaultfd_wp_async(struct vm_area_struct *vma);
 
+void userfaultfd_reset_ctx(struct vm_area_struct *vma);
+
+struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
+					     struct vm_area_struct *prev,
+					     struct vm_area_struct *vma,
+					     unsigned long start,
+					     unsigned long end);
+
+int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
+			       struct vm_area_struct *vma,
+			       unsigned long vm_flags,
+			       unsigned long start, unsigned long end,
+			       bool wp_async);
+
+void userfaultfd_release_new(struct userfaultfd_ctx *ctx);
+
+void userfaultfd_release_all(struct mm_struct *mm,
+			     struct userfaultfd_ctx *ctx);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-- 
cgit v1.2.3


From fa04c08f3ce649b47781c7663e92398197f5b551 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 29 Jul 2024 12:50:36 +0100
Subject: mm: move vma_modify() and helpers to internal header

These are core VMA manipulation functions which invoke VMA splitting and
merging and should not be directly accessed from outside of mm/.

Link: https://lkml.kernel.org/r/5efde0c6342a8860d5ffc90b415f3989fd8ed0b2.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 60 ------------------------------------------------------
 1 file changed, 60 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 153a4662a0cd..c4054e68dc09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3279,66 +3279,6 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
-struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
-				  struct vm_area_struct *prev,
-				  struct vm_area_struct *vma,
-				  unsigned long start, unsigned long end,
-				  unsigned long vm_flags,
-				  struct mempolicy *policy,
-				  struct vm_userfaultfd_ctx uffd_ctx,
-				  struct anon_vma_name *anon_name);
-
-/* We are about to modify the VMA's flags. */
-static inline struct vm_area_struct
-*vma_modify_flags(struct vma_iterator *vmi,
-		  struct vm_area_struct *prev,
-		  struct vm_area_struct *vma,
-		  unsigned long start, unsigned long end,
-		  unsigned long new_flags)
-{
-	return vma_modify(vmi, prev, vma, start, end, new_flags,
-			  vma_policy(vma), vma->vm_userfaultfd_ctx,
-			  anon_vma_name(vma));
-}
-
-/* We are about to modify the VMA's flags and/or anon_name. */
-static inline struct vm_area_struct
-*vma_modify_flags_name(struct vma_iterator *vmi,
-		       struct vm_area_struct *prev,
-		       struct vm_area_struct *vma,
-		       unsigned long start,
-		       unsigned long end,
-		       unsigned long new_flags,
-		       struct anon_vma_name *new_name)
-{
-	return vma_modify(vmi, prev, vma, start, end, new_flags,
-			  vma_policy(vma), vma->vm_userfaultfd_ctx, new_name);
-}
-
-/* We are about to modify the VMA's memory policy. */
-static inline struct vm_area_struct
-*vma_modify_policy(struct vma_iterator *vmi,
-		   struct vm_area_struct *prev,
-		   struct vm_area_struct *vma,
-		   unsigned long start, unsigned long end,
-		   struct mempolicy *new_pol)
-{
-	return vma_modify(vmi, prev, vma, start, end, vma->vm_flags,
-			  new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma));
-}
-
-/* We are about to modify the VMA's flags and/or uffd context. */
-static inline struct vm_area_struct
-*vma_modify_flags_uffd(struct vma_iterator *vmi,
-		       struct vm_area_struct *prev,
-		       struct vm_area_struct *vma,
-		       unsigned long start, unsigned long end,
-		       unsigned long new_flags,
-		       struct vm_userfaultfd_ctx new_ctx)
-{
-	return vma_modify(vmi, prev, vma, start, end, new_flags,
-			  vma_policy(vma), new_ctx, anon_vma_name(vma));
-}
 
 static inline int check_data_rlimit(unsigned long rlim,
 				    unsigned long new,
-- 
cgit v1.2.3


From d61f0d59683d9c899211c300254d4140c482a6c0 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 29 Jul 2024 12:50:37 +0100
Subject: mm: move vma_shrink(), vma_expand() to internal header

The vma_shrink() and vma_expand() functions are internal VMA manipulation
functions which we ought to abstract for use outside of memory management
code.

To achieve this, we replace shift_arg_pages() in fs/exec.c with an
invocation of a new relocate_vma_down() function implemented in mm/mmap.c,
which enables us to also move move_page_tables() and vma_iter_prev_range()
to internal.h.

The purpose of doing this is to isolate key VMA manipulation functions in
order that we can both abstract them and later render them easily
testable.

Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4054e68dc09..1c0d88446eb8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
 	return mas_prev(&vmi->mas, 0);
 }
 
-static inline
-struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
-{
-	return mas_prev_range(&vmi->mas, 0);
-}
-
 static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
 {
 	return vmi->mas.index;
@@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
-extern unsigned long move_page_tables(struct vm_area_struct *vma,
-		unsigned long old_addr, struct vm_area_struct *new_vma,
-		unsigned long new_addr, unsigned long len,
-		bool need_rmap_locks, bool for_stack);
-
 /*
  * Flags used by change_protection().  For now we make it a bitmap so
  * that we can pass in multiple flags just like parameters.  However
@@ -3267,11 +3256,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
 
 /* mmap.c */
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
-		      unsigned long start, unsigned long end, pgoff_t pgoff,
-		      struct vm_area_struct *next);
-extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
-		       unsigned long start, unsigned long end, pgoff_t pgoff);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void unlink_file_vma(struct vm_area_struct *);
@@ -3279,6 +3263,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
+int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
 
 static inline int check_data_rlimit(unsigned long rlim,
 				    unsigned long new,
-- 
cgit v1.2.3


From 49b1b8d6f6831026cb105b0eafa18f13db612d86 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 29 Jul 2024 12:50:38 +0100
Subject: mm: move internal core VMA manipulation functions to own file

This patch introduces vma.c and moves internal core VMA manipulation
functions to this file from mmap.c.

This allows us to isolate VMA functionality in a single place such that we
can create userspace testing code that invokes this functionality in an
environment where we can implement simple unit tests of core
functionality.

This patch ensures that core VMA functionality is explicitly marked as
such by its presence in mm/vma.h.

It also places the header includes required by vma.c in vma_internal.h,
which is simply imported by vma.c.  This makes the VMA functionality
testable, as userland testing code can simply stub out functionality as
required.

Link: https://lkml.kernel.org/r/c77a6aafb4c42aaadb8e7271a853658cbdca2e22.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 35 -----------------------------------
 1 file changed, 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1c0d88446eb8..bd219ac9c026 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1005,21 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
 	return mas_prev(&vmi->mas, 0);
 }
 
-static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
-{
-	return vmi->mas.index;
-}
-
-static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
-{
-	return vmi->mas.last + 1;
-}
-static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
-				      unsigned long count)
-{
-	return mas_expected_entries(&vmi->mas, count);
-}
-
 static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
 			unsigned long start, unsigned long end, gfp_t gfp)
 {
@@ -2534,21 +2519,6 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
 					    MM_CP_UFFD_WP_RESOLVE)
 
-bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
-bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
-static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
-{
-	/*
-	 * We want to check manually if we can change individual PTEs writable
-	 * if we can't do that automatically for all PTEs in a mapping. For
-	 * private mappings, that's always the case when we have write
-	 * permissions as we properly have to handle COW.
-	 */
-	if (vma->vm_flags & VM_SHARED)
-		return vma_wants_writenotify(vma, vma->vm_page_prot);
-	return !!(vma->vm_flags & VM_WRITE);
-
-}
 bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t pte);
 extern long change_protection(struct mmu_gather *tlb,
@@ -3256,12 +3226,7 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
 
 /* mmap.c */
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
-extern void unlink_file_vma(struct vm_area_struct *);
-extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
-	unsigned long addr, unsigned long len, pgoff_t pgoff,
-	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
 int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
 
-- 
cgit v1.2.3


From 29943248af0a8ac3edb808564baa417b40e35521 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Mon, 29 Jul 2024 14:47:17 +0530
Subject: mm: improve code consistency with zonelist_* helper functions

Replace direct access to zoneref->zone, zoneref->zone_idx, or
zone_to_nid(zoneref->zone) with the corresponding zonelist_* helper
functions for consistency.

No functional change.

Link: https://lkml.kernel.org/r/20240729091717.464-1-shivankg@amd.com
Co-developed-by: Shivank Garg <shivankg@amd.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h     | 4 ++--
 include/trace/events/oom.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1dc6248feb83..dbb69f96a152 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1688,7 +1688,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 			zone = zonelist_zone(z))
 
 #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
-	for (zone = z->zone;	\
+	for (zone = zonelist_zone(z);	\
 		zone;							\
 		z = next_zones_zonelist(++z, highidx, nodemask),	\
 			zone = zonelist_zone(z))
@@ -1724,7 +1724,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes)
 	nid = first_node(*nodes);
 	zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
 	z = first_zones_zonelist(zonelist, ZONE_NORMAL,	nodes);
-	return (!z->zone) ? true : false;
+	return (!zonelist_zone(z)) ? true : false;
 }
 
 
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index a42be4c8563b..9f0a5d1482c4 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -55,8 +55,8 @@ TRACE_EVENT(reclaim_retry_zone,
 	),
 
 	TP_fast_assign(
-		__entry->node = zone_to_nid(zoneref->zone);
-		__entry->zone_idx = zoneref->zone_idx;
+		__entry->node = zonelist_node_idx(zoneref);
+		__entry->zone_idx = zonelist_zone_idx(zoneref);
 		__entry->order = order;
 		__entry->reclaimable = reclaimable;
 		__entry->available = available;
-- 
cgit v1.2.3


From 9f101bef408a3f70c44b6e4de44d3d4e2655ed10 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Tue, 30 Jul 2024 19:13:39 +1200
Subject: mm: swap: add nr argument in swapcache_prepare and swapcache_clear to
 support large folios

Right now, swapcache_prepare() and swapcache_clear() supports one entry
only, to support large folios, we need to handle multiple swap entries.

To optimize stack usage, we iterate twice in __swap_duplicate(): the first
time to verify that all entries are valid, and the second time to apply
the modifications to the entries.

Currently, we're using nr=1 for the existing users.

[v-songbaohua@oppo.com: clarify swap_count_continued and improve readability for  __swap_duplicate]
  Link: https://lkml.kernel.org/r/20240802071817.47081-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240730071339.107447-2-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Gao Xiang <xiang@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba7ea95d1c57..5b920fa2315b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -480,7 +480,7 @@ extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
-extern int swapcache_prepare(swp_entry_t);
+extern int swapcache_prepare(swp_entry_t entry, int nr);
 extern void swap_free_nr(swp_entry_t entry, int nr_pages);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
@@ -554,7 +554,7 @@ static inline int swap_duplicate(swp_entry_t swp)
 	return 0;
 }
 
-static inline int swapcache_prepare(swp_entry_t swp)
+static inline int swapcache_prepare(swp_entry_t swp, int nr)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 94ccd21e9a5f41585bd16cc84dab2afa6cc21149 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 31 Jul 2024 16:20:00 +0200
Subject: mm/hugetlb: remove hugetlb_follow_page_mask() leftover

We removed hugetlb_follow_page_mask() in commit 9cb28da54643 ("mm/gup:
handle hugetlb in the generic follow_page_mask code") but forgot to
cleanup some leftovers.

While at it, simplify the hugetlb comment, it's overly detailed and rather
confusing.  Stating that we may end up in there during coredumping is
sufficient to explain the PF_DUMPCORE usage.

Link: https://lkml.kernel.org/r/20240731142000.625044-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9b7bcfce6920..3100a52ceb73 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -127,9 +127,6 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 			     unsigned long len);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 			    struct vm_area_struct *, struct vm_area_struct *);
-struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
-				      unsigned long address, unsigned int flags,
-				      unsigned int *page_mask);
 void unmap_hugepage_range(struct vm_area_struct *,
 			  unsigned long, unsigned long, struct page *,
 			  zap_flags_t);
-- 
cgit v1.2.3


From 17d5f38b33b66210c5a46af639c47fddfe1c5b4d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 31 Jul 2024 18:07:58 +0200
Subject: mm: clarify folio_likely_mapped_shared() documentation for KSM folios

For KSM folios, the function actually does what it is supposed to do: even
having multiple mappings inside the same MM is considered "sharing", as
there is no real relationship between these KSM page mappings -- in
contrast to mapping the same file range twice and having the same
pagecache page mapped twice.

Link: https://lkml.kernel.org/r/20240731160758.808925-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bd219ac9c026..2f6c08b53e4f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2132,14 +2132,19 @@ static inline size_t folio_size(const struct folio *folio)
  * MM ("mapped shared"), or if the folio is only mapped into a single MM
  * ("mapped exclusively").
  *
+ * For KSM folios, this function also returns "mapped shared" when a folio is
+ * mapped multiple times into the same MM, because the individual page mappings
+ * are independent.
+ *
  * As precise information is not easily available for all folios, this function
  * estimates the number of MMs ("sharers") that are currently mapping a folio
  * using the number of times the first page of the folio is currently mapped
  * into page tables.
  *
- * For small anonymous folios (except KSM folios) and anonymous hugetlb folios,
- * the return value will be exactly correct, because they can only be mapped
- * at most once into an MM, and they cannot be partially mapped.
+ * For small anonymous folios and anonymous hugetlb folios, the return
+ * value will be exactly correct: non-KSM folios can only be mapped at most once
+ * into an MM, and they cannot be partially mapped. KSM folios are
+ * considered shared even if mapped multiple times into the same MM.
  *
  * For other folios, the result can be fuzzy:
  *    #. For partially-mappable large folios (THP), the return value can wrongly
@@ -2148,9 +2153,6 @@ static inline size_t folio_size(const struct folio *folio)
  *    #. For pagecache folios (including hugetlb), the return value can wrongly
  *       indicate "mapped shared" (false positive) when two VMAs in the same MM
  *       cover the same file range.
- *    #. For (small) KSM folios, the return value can wrongly indicate "mapped
- *       shared" (false positive), when the folio is mapped multiple times into
- *       the same MM.
  *
  * Further, this function only considers current page table mappings that
  * are tracked using the folio mapcount(s).
-- 
cgit v1.2.3


From 03790c51a475c46647079e67e2460a149938bfd6 Mon Sep 17 00:00:00 2001
From: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Date: Thu, 1 Aug 2024 23:25:47 +0000
Subject: mm: create promo_wmark_pages and clean up open-coded sites

Patch series "mm: print the promo watermark in zoneinfo", v2.


This patch (of 2):

Define promo_wmark_pages and convert current call sites of wmark_pages
with fixed WMARK_PROMO to using it instead.

Link: https://lkml.kernel.org/r/20240801232548.36604-1-kaiyang2@cs.cmu.edu
Link: https://lkml.kernel.org/r/20240801232548.36604-2-kaiyang2@cs.cmu.edu
Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dbb69f96a152..9f3589f7f05c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -669,6 +669,7 @@ enum zone_watermarks {
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
 #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
+#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost)
 #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
 
 /*
-- 
cgit v1.2.3


From 620943d7ee69070df8844235d58843af48ac70e2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 1 Aug 2024 16:50:05 -0700
Subject: include/linux/mmzone.h: clean up watermark accessors

- we have a helper wmark_pages().  Teach min_wmark_pages(),
  low_wmark_pages(), high_wmark_pages() and promo_wmark_pages() to use
  it instead of open-coding its implementation.

- there's no reason to implement all these things as macros.  Redo them
  in C.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9f3589f7f05c..17506e4a2835 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -666,12 +666,6 @@ enum zone_watermarks {
 #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
 #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
 
-#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
-#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
-#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
-#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost)
-#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
-
 /*
  * Flags used in pcp->flags field.
  *
@@ -1017,6 +1011,32 @@ enum zone_flags {
 	ZONE_BELOW_HIGH,		/* zone is below high watermark. */
 };
 
+static inline unsigned long wmark_pages(const struct zone *z,
+					enum zone_watermarks w)
+{
+	return z->_watermark[w] + z->watermark_boost;
+}
+
+static inline unsigned long min_wmark_pages(const struct zone *z)
+{
+	return wmark_pages(z, WMARK_MIN);
+}
+
+static inline unsigned long low_wmark_pages(const struct zone *z)
+{
+	return wmark_pages(z, WMARK_LOW);
+}
+
+static inline unsigned long high_wmark_pages(const struct zone *z)
+{
+	return wmark_pages(z, WMARK_HIGH);
+}
+
+static inline unsigned long promo_wmark_pages(const struct zone *z)
+{
+	return wmark_pages(z, WMARK_PROMO);
+}
+
 static inline unsigned long zone_managed_pages(struct zone *zone)
 {
 	return (unsigned long)atomic_long_read(&zone->managed_pages);
-- 
cgit v1.2.3


From aa39ca6940f1a0540f2984051b3089972f42959b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 2 Aug 2024 17:55:15 +0200
Subject: mm/pagewalk: introduce folio_walk_start() + folio_walk_end()

We want to get rid of follow_page(), and have a more reasonable way to
just lookup a folio mapped at a certain address, perform some checks while
still under PTL, and then only conditionally grab a folio reference if
really required.

Further, we might want to get rid of some walk_page_range*() users that
really only want to temporarily lookup a single folio at a single address.

So let's add a new page table walker that does exactly that, similarly to
GUP also being able to walk hugetlb VMAs.

Add folio_walk_end() as a macro for now: the compiler is not easy to
please with the pte_unmap()->kunmap_local().

Note that one difference between follow_page() and get_user_pages(1) is
that follow_page() will not trigger faults to get something mapped.  So
folio_walk is at least currently not a replacement for get_user_pages(1),
but could likely be extended/reused to achieve something similar in the
future.

Link: https://lkml.kernel.org/r/20240802155524.517137-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagewalk.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 27cd1e59ccf7..f5eb5a32aeed 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -130,4 +130,62 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
 		      pgoff_t nr, const struct mm_walk_ops *ops,
 		      void *private);
 
+typedef int __bitwise folio_walk_flags_t;
+
+/*
+ * Walk migration entries as well. Careful: a large folio might get split
+ * concurrently.
+ */
+#define FW_MIGRATION			((__force folio_walk_flags_t)BIT(0))
+
+/* Walk shared zeropages (small + huge) as well. */
+#define FW_ZEROPAGE			((__force folio_walk_flags_t)BIT(1))
+
+enum folio_walk_level {
+	FW_LEVEL_PTE,
+	FW_LEVEL_PMD,
+	FW_LEVEL_PUD,
+};
+
+/**
+ * struct folio_walk - folio_walk_start() / folio_walk_end() data
+ * @page:	exact folio page referenced (if applicable)
+ * @level:	page table level identifying the entry type
+ * @pte:	pointer to the page table entry (FW_LEVEL_PTE).
+ * @pmd:	pointer to the page table entry (FW_LEVEL_PMD).
+ * @pud:	pointer to the page table entry (FW_LEVEL_PUD).
+ * @ptl:	pointer to the page table lock.
+ *
+ * (see folio_walk_start() documentation for more details)
+ */
+struct folio_walk {
+	/* public */
+	struct page *page;
+	enum folio_walk_level level;
+	union {
+		pte_t *ptep;
+		pud_t *pudp;
+		pmd_t *pmdp;
+	};
+	union {
+		pte_t pte;
+		pud_t pud;
+		pmd_t pmd;
+	};
+	/* private */
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+};
+
+struct folio *folio_walk_start(struct folio_walk *fw,
+		struct vm_area_struct *vma, unsigned long addr,
+		folio_walk_flags_t flags);
+
+#define folio_walk_end(__fw, __vma) do { \
+	spin_unlock((__fw)->ptl); \
+	if (likely((__fw)->level == FW_LEVEL_PTE)) \
+		pte_unmap((__fw)->ptep); \
+	vma_pgtable_walk_end(__vma); \
+} while (0)
+
 #endif /* _LINUX_PAGEWALK_H */
-- 
cgit v1.2.3


From 8710f6ed34e7bcf98971d65acfb5eaed5fc469b0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 2 Aug 2024 17:55:20 +0200
Subject: mm/huge_memory: convert split_huge_pages_pid() from follow_page() to
 folio_walk

Let's remove yet another follow_page() user.  Note that we have to do the
split without holding the PTL, after folio_walk_end().  We don't care
about losing the secretmem check in follow_page().

[david@redhat.com: teach can_split_folio() that we are not holding an additional reference]
  Link: https://lkml.kernel.org/r/c75d1c6c-8ea6-424f-853c-1ccda6c77ba2@redhat.com
Link: https://lkml.kernel.org/r/20240802155524.517137-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e25d9ebfdf89..ce44caa40eed 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -314,7 +314,7 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
 		unsigned long len, unsigned long pgoff, unsigned long flags,
 		vm_flags_t vm_flags);
 
-bool can_split_folio(struct folio *folio, int *pextra_pins);
+bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order);
 static inline int split_huge_page(struct page *page)
@@ -470,7 +470,7 @@ thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
 }
 
 static inline bool
-can_split_folio(struct folio *folio, int *pextra_pins)
+can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 7290840de65e04c1fc59dab692468fc9600c6038 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 2 Aug 2024 17:55:23 +0200
Subject: mm: remove follow_page()

All users are gone, let's remove it and any leftovers in comments.  We'll
leave any FOLL/follow_page_() naming cleanups as future work.

Link: https://lkml.kernel.org/r/20240802155524.517137-11-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2f6c08b53e4f..ee8cea73d415 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3527,9 +3527,6 @@ static inline vm_fault_t vmf_fs_error(int err)
 	return VM_FAULT_SIGBUS;
 }
 
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-			 unsigned int foll_flags);
-
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
 	if (vm_fault & VM_FAULT_OOM)
-- 
cgit v1.2.3


From e31c38e037621c445bb4393fd77e0a76e6e0899a Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Mon, 5 Aug 2024 16:22:42 -0700
Subject: zswap: implement a second chance algorithm for dynamic zswap shrinker

Patch series "improving dynamic zswap shrinker protection scheme", v3.

When experimenting with the memory-pressure based (i.e "dynamic") zswap
shrinker in production, we observed a sharp increase in the number of
swapins, which led to performance regression.  We were able to trace this
regression to the following problems with the shrinker's warm pages
protection scheme:

1. The protection decays way too rapidly, and the decaying is coupled with
   zswap stores, leading to anomalous patterns, in which a small batch of
   zswap stores effectively erase all the protection in place for the
   warmer pages in the zswap LRU.

   This observation has also been corroborated upstream by Takero Funaki
   (in [1]).

2. We inaccurately track the number of swapped in pages, missing the
   non-pivot pages that are part of the readahead window, while counting
   the pages that are found in the zswap pool.


To alleviate these two issues, this patch series improve the dynamic zswap
shrinker in the following manner:

1. Replace the protection size tracking scheme with a second chance
   algorithm. This new scheme removes the need for haphazard stats
   decaying, and automatically adjusts the pace of pages aging with memory
   pressure, and writeback rate with pool activities: slowing down when
   the pool is dominated with zswpouts, and speeding up when the pool is
   dominated with stale entries.

2. Fix the tracking of the number of swapins to take into account
   non-pivot pages in the readahead window.

With these two changes in place, in a kernel-building benchmark without
any cold data added, the number of swapins is reduced by 64.12%.  This
translate to a 10.32% reduction in build time.  We also observe a 3%
reduction in kernel CPU time.

In another benchmark, with cold data added (to gauge the new algorithm's
ability to offload cold data), the new second chance scheme outperforms
the old protection scheme by around 0.7%, and actually written back around
21% more pages to backing swap device.  So the new scheme is just as good,
if not even better than the old scheme on this front as well.

[1]: https://lore.kernel.org/linux-mm/CAPpodddcGsK=0Xczfuk8usgZ47xeyf4ZjiofdT+ujiyz6V2pFQ@mail.gmail.com/


This patch (of 2):

Current zswap shrinker's heuristics to prevent overshrinking is brittle
and inaccurate, specifically in the way we decay the protection size (i.e
making pages in the zswap LRU eligible for reclaim).

We currently decay protection aggressively in zswap_lru_add() calls.  This
leads to the following unfortunate effect: when a new batch of pages enter
zswap, the protection size rapidly decays to below 25% of the zswap LRU
size, which is way too low.

We have observed this effect in production, when experimenting with the
zswap shrinker: the rate of shrinking shoots up massively right after a
new batch of zswap stores.  This is somewhat the opposite of what we want
originally - when new pages enter zswap, we want to protect both these new
pages AND the pages that are already protected in the zswap LRU.

Replace existing heuristics with a second chance algorithm

1. When a new zswap entry is stored in the zswap pool, its referenced
   bit is set.
2. When the zswap shrinker encounters a zswap entry with the referenced
   bit set, give it a second chance - only flips the referenced bit and
   rotate it in the LRU.
3. If the shrinker encounters the entry again, this time with its
   referenced bit unset, then it can reclaim the entry.

In this manner, the aging of the pages in the zswap LRUs are decoupled
from zswap stores, and picks up the pace with increasing memory pressure
(which is what we want).

The second chance scheme allows us to modulate the writeback rate based on
recent pool activities.  Entries that recently entered the pool will be
protected, so if the pool is dominated by such entries the writeback rate
will reduce proportionally, protecting the workload's workingset.On the
other hand, stale entries will be written back quickly, which increases
the effective writeback rate.

The referenced bit is added at the hole after the `length` field of struct
zswap_entry, so there is no extra space overhead for this algorithm.

We will still maintain the count of swapins, which is consumed and
subtracted from the lru size in zswap_shrinker_count(), to further
penalize past overshrinking that led to disk swapins.  The idea is that
had we considered this many more pages in the LRU active/protected, they
would not have been written back and we would not have had to swapped them
in.

To test this new heuristics, I built the kernel under a cgroup with
memory.max set to 2G, on a host with 36 cores:

With the old shrinker:

real: 263.89s
user: 4318.11s
sys: 673.29s
swapins: 227300.5

With the second chance algorithm:

real: 244.85s
user: 4327.22s
sys: 664.39s
swapins: 94663

(average over 5 runs)

We observe an 1.3% reduction in kernel CPU usage, and around 7.2%
reduction in real time. Note that the number of swapped in pages
dropped by 58%.

[nphamcs@gmail.com: fix a small mistake in the referenced bit documentation]
  Link: https://lkml.kernel.org/r/20240806003403.3142387-1-nphamcs@gmail.com
Link: https://lkml.kernel.org/r/20240805232243.2896283-1-nphamcs@gmail.com
Link: https://lkml.kernel.org/r/20240805232243.2896283-2-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Yosry Ahmed <yosryahmed@google.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Takero Funaki <flintglass@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zswap.h | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 6cecb4a4f68b..9cd1beef0654 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -13,17 +13,15 @@ extern atomic_t zswap_stored_pages;
 
 struct zswap_lruvec_state {
 	/*
-	 * Number of pages in zswap that should be protected from the shrinker.
-	 * This number is an estimate of the following counts:
+	 * Number of swapped in pages from disk, i.e not found in the zswap pool.
 	 *
-	 * a) Recent page faults.
-	 * b) Recent insertion to the zswap LRU. This includes new zswap stores,
-	 *    as well as recent zswap LRU rotations.
-	 *
-	 * These pages are likely to be warm, and might incur IO if the are written
-	 * to swap.
+	 * This is consumed and subtracted from the lru size in
+	 * zswap_shrinker_count() to penalize past overshrinking that led to disk
+	 * swapins. The idea is that had we considered this many more pages in the
+	 * LRU active/protected and not written them back, we would not have had to
+	 * swapped them in.
 	 */
-	atomic_long_t nr_zswap_protected;
+	atomic_long_t nr_disk_swapins;
 };
 
 unsigned long zswap_total_pages(void);
-- 
cgit v1.2.3


From 17fe833b0de08a78bfb51388e0615969d73ea8ad Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 5 Aug 2024 14:52:03 +0200
Subject: mm: fix (harmless) type confusion in lock_vma_under_rcu()

There is a (harmless) type confusion in lock_vma_under_rcu(): After
vma_start_read(), we have taken the VMA lock but don't know yet whether
the VMA has already been detached and scheduled for RCU freeing.  At this
point, ->vm_start and ->vm_end are accessed.

vm_area_struct contains a union such that ->vm_rcu uses the same memory as
->vm_start and ->vm_end; so accessing ->vm_start and ->vm_end of a
detached VMA is illegal and leads to type confusion between union members.

Fix it by reordering the vma->detached check above the address checks, and
document the rules for RCU readers accessing VMAs.

This will probably change the number of observed VMA_LOCK_MISS events
(since previously, trying to access a detached VMA whose ->vm_rcu has been
scheduled would bail out when checking the fault address against the
rcu_head members reinterpreted as VMA bounds).

Link: https://lkml.kernel.org/r/20240805-fix-vma-lock-type-confusion-v1-1-9f25443a9a71@google.com
Fixes: 50ee32537206 ("mm: introduce lock_vma_under_rcu to be used from arch-specific code")
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 165c58b12ccc..003619fab20e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -660,6 +660,9 @@ struct vma_numab_state {
  * per VM-area/task. A VM area is any part of the process virtual memory
  * space that has a special rule for the page-fault handlers (ie a shared
  * library, the executable area etc).
+ *
+ * Only explicitly marked struct members may be accessed by RCU readers before
+ * getting a stable reference.
  */
 struct vm_area_struct {
 	/* The first cache line has the info for VMA tree walking. */
@@ -675,7 +678,11 @@ struct vm_area_struct {
 #endif
 	};
 
-	struct mm_struct *vm_mm;	/* The address space we belong to. */
+	/*
+	 * The address space we belong to.
+	 * Unstable RCU readers are allowed to read this.
+	 */
+	struct mm_struct *vm_mm;
 	pgprot_t vm_page_prot;          /* Access permissions of this VMA. */
 
 	/*
@@ -688,7 +695,10 @@ struct vm_area_struct {
 	};
 
 #ifdef CONFIG_PER_VMA_LOCK
-	/* Flag to indicate areas detached from the mm->mm_mt tree */
+	/*
+	 * Flag to indicate areas detached from the mm->mm_mt tree.
+	 * Unstable RCU readers are allowed to read this.
+	 */
 	bool detached;
 
 	/*
@@ -706,6 +716,7 @@ struct vm_area_struct {
 	 * slowpath.
 	 */
 	int vm_lock_seq;
+	/* Unstable RCU readers are allowed to read this. */
 	struct vma_lock *vm_lock;
 #endif
 
-- 
cgit v1.2.3


From cc0a0f98553528791ae33ba5ee8c118b52ae2028 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 5 Aug 2024 14:39:39 +0200
Subject: kfence: introduce burst mode

Introduce burst mode, which can be configured with kfence.burst=$count,
where the burst count denotes the additional successive slab allocations
to be allocated through KFENCE for each sample interval.

The idea is that this can give developers an additional knob to make
KFENCE more aggressive when debugging specific issues of systems where
either rebooting or recompiling the kernel with KASAN is not possible.

Experiment: To assess the effectiveness of the new option, we randomly
picked a recent out-of-bounds [1] and use-after-free bug [2], each with a
reproducer provided by syzbot, that initially detected these bugs with
KASAN.  We then tried to reproduce the bugs with KFENCE below.

[1] Fixed by: 7c55b78818cf ("jfs: xattr: fix buffer overflow for invalid xattr")
    https://syzkaller.appspot.com/bug?id=9d1b59d4718239da6f6069d3891863c25f9f24a2
[2] Fixed by: f8ad00f3fb2a ("l2tp: fix possible UAF when cleaning up tunnels")
    https://syzkaller.appspot.com/bug?id=4f34adc84f4a3b080187c390eeef60611fd450e1

The following KFENCE configs were compared. A pool size of 1023 objects
was used for all configurations.

	Baseline
		kfence.sample_interval=100
		kfence.skip_covered_thresh=75
		kfence.burst=0

	Aggressive
		kfence.sample_interval=1
		kfence.skip_covered_thresh=10
		kfence.burst=0

	AggressiveBurst
		kfence.sample_interval=1
		kfence.skip_covered_thresh=10
		kfence.burst=1000

Each reproducer was run 10 times (after a fresh reboot), with the
following detection counts for each KFENCE config:

                    | Detection Count out of 10 |
                    |    OOB [1]  |    UAF [2]  |
  ------------------+-------------+-------------+
  Default           |     0/10    |     0/10    |
  Aggressive        |     0/10    |     0/10    |
  AggressiveBurst   |     8/10    |     8/10    |

With the Default and even the Aggressive configs the results are
unsurprising, given KFENCE has not been designed for deterministic bug
detection of small test cases.

However, when enabling burst mode with relatively large burst count,
KFENCE can start to detect heap memory-safety bugs even in simpler test
cases with high probability (in the above cases with ~80% probability).

Link: https://lkml.kernel.org/r/20240805124203.2692278-1-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kfence.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 88100cc9caba..0ad1ddbb8b99 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -124,7 +124,7 @@ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp
 	if (!static_branch_likely(&kfence_allocation_key))
 		return NULL;
 #endif
-	if (likely(atomic_read(&kfence_allocation_gate)))
+	if (likely(atomic_read(&kfence_allocation_gate) > 0))
 		return NULL;
 	return __kfence_alloc(s, size, flags);
 }
-- 
cgit v1.2.3


From 47baed6a132f611228113851a70c73f6e6478d87 Mon Sep 17 00:00:00 2001
From: Jianhui Zhou <912460177@qq.com>
Date: Wed, 7 Aug 2024 15:44:48 +0800
Subject: percpu: remove pcpu_alloc_size()

pcpu_alloc_size() was added in 7ac5c53e0073 "mm/percpu.c: introduce
pcpu_alloc_size()", which is used to get the allocated memory size in bpf.
However, pcpu_alloc_size() is no longer used in "bpf: Use c->unit_size to
select target cache during free" because its actuall allocated memory size
may change at runtime due to its slab merging mechanism.  Therefore,
pcpu_alloc_size() can be removed.

Link: https://lkml.kernel.org/r/tencent_AD5C50E8D78C07A3CE539BD5F6BF39706507@qq.com
Signed-off-by: Jianhui Zhou <912460177@qq.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: JonasZhou <JonasZhou@zhaoxin.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 4b2047b78b67..b6321fc49159 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -135,7 +135,6 @@ extern void __init setup_per_cpu_areas(void);
 
 extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
 				   gfp_t gfp) __alloc_size(1);
-extern size_t pcpu_alloc_size(void __percpu *__pdata);
 
 #define __alloc_percpu_gfp(_size, _align, _gfp)				\
 	alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp))
-- 
cgit v1.2.3


From 09022bc196d23484a7a5d48cf373f8583e3fcf23 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 7 Aug 2024 20:35:26 +0100
Subject: mm: remove PG_error

The PG_error bit is now unused; delete it and free up a bit in
page->flags.

Link: https://lkml.kernel.org/r/20240807193528.1865100-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h             | 6 +-----
 include/trace/events/mmflags.h         | 1 -
 include/uapi/linux/kernel-page-flags.h | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5769fe6e4950..a0a29bd092f8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -66,8 +66,6 @@
  * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
  * file-backed pagecache (see mm/vmscan.c).
  *
- * PG_error is set to indicate that an I/O error occurred on this page.
- *
  * PG_arch_1 is an architecture specific page state bit.  The generic code
  * guarantees that this bit is cleared for a page when it first is entered into
  * the page cache.
@@ -103,7 +101,6 @@ enum pageflags {
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_active,
 	PG_workingset,
-	PG_error,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
 	PG_reserved,
@@ -183,7 +180,7 @@ enum pageflags {
 	 */
 
 	/* At least one page in this folio has the hwpoison flag set */
-	PG_has_hwpoisoned = PG_error,
+	PG_has_hwpoisoned = PG_active,
 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
 };
 
@@ -506,7 +503,6 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
 FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
-PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
 FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE)
 	FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE)
 	__FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b63d211bd141..b5c4da370a50 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -100,7 +100,6 @@
 #define __def_pageflag_names						\
 	DEF_PAGEFLAG_NAME(locked),					\
 	DEF_PAGEFLAG_NAME(waiters),					\
-	DEF_PAGEFLAG_NAME(error),					\
 	DEF_PAGEFLAG_NAME(referenced),					\
 	DEF_PAGEFLAG_NAME(uptodate),					\
 	DEF_PAGEFLAG_NAME(dirty),					\
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 6f2f2720f3ac..ff8032227876 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -7,7 +7,7 @@
  */
 
 #define KPF_LOCKED		0
-#define KPF_ERROR		1
+#define KPF_ERROR		1	/* Now unused */
 #define KPF_REFERENCED		2
 #define KPF_UPTODATE		3
 #define KPF_DIRTY		4
-- 
cgit v1.2.3


From 310183de7bb2ed114a80d057690ddb23d0cfe814 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 9 Aug 2024 14:48:50 +0300
Subject: mm: introduce PageUnaccepted() page type

The new page type allows physical memory scanners to detect unaccepted
memory and handle it accordingly.

The page type is serialized with zone lock.

Link: https://lkml.kernel.org/r/20240809114854.3745464-5-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a0a29bd092f8..17175a328e6b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -939,6 +939,7 @@ enum pagetype {
 	PG_hugetlb	= 0x04000000,
 	PG_slab		= 0x02000000,
 	PG_zsmalloc	= 0x01000000,
+	PG_unaccepted	= 0x00800000,
 
 	PAGE_TYPE_BASE	= 0x80000000,
 
@@ -1072,6 +1073,13 @@ FOLIO_TEST_FLAG_FALSE(hugetlb)
 
 PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
 
+/*
+ * Mark pages that has to be accepted before touched for the first time.
+ *
+ * Serialized with zone lock.
+ */
+PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
+
 /**
  * PageHuge - Determine if the page belongs to hugetlbfs
  * @page: The page to test.
-- 
cgit v1.2.3


From 5adfeaecc487e7023f1c7bbdc081707d7a93110f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 9 Aug 2024 14:48:51 +0300
Subject: mm: rework accept memory helpers

Make accept_memory() and range_contains_unaccepted_memory() take 'start'
and 'size' arguments instead of 'start' and 'end'.

Remove accept_page(), replacing it with direct calls to accept_memory().
The accept_page() name is going to be used for a different function.

Link: https://lkml.kernel.org/r/20240809114854.3745464-6-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ee8cea73d415..b1eed30fdc06 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4062,18 +4062,18 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 
 #ifdef CONFIG_UNACCEPTED_MEMORY
 
-bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end);
-void accept_memory(phys_addr_t start, phys_addr_t end);
+bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size);
+void accept_memory(phys_addr_t start, unsigned long size);
 
 #else
 
 static inline bool range_contains_unaccepted_memory(phys_addr_t start,
-						    phys_addr_t end)
+						    unsigned long size)
 {
 	return false;
 }
 
-static inline void accept_memory(phys_addr_t start, phys_addr_t end)
+static inline void accept_memory(phys_addr_t start, unsigned long size)
 {
 }
 
@@ -4081,9 +4081,7 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end)
 
 static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
 {
-	phys_addr_t paddr = pfn << PAGE_SHIFT;
-
-	return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
+	return range_contains_unaccepted_memory(pfn << PAGE_SHIFT, PAGE_SIZE);
 }
 
 void vma_pgtable_walk_begin(struct vm_area_struct *vma);
-- 
cgit v1.2.3


From 1c399e74a97ca733d0780cc51819aa81fb292c22 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 12 Aug 2024 14:12:23 -0400
Subject: mm/x86: implement arch_check_zapped_pud()

Introduce arch_check_zapped_pud() to sanity check shadow stack on PUD
zaps.  It has the same logic as the PMD helper.

One thing to mention is, it might be a good idea to use page_table_check
in the future for trapping wrong setups of shadow stack pgtable entries
[1].  That is left for the future as a separate effort.

[1] https://lore.kernel.org/all/59d518698f664e07c036a5098833d7b56b953305.camel@intel.com

Link: https://lkml.kernel.org/r/20240812181225.1360970-6-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2a6a3cccfc36..780f3b439d98 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -447,6 +447,12 @@ static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
 }
 #endif
 
+#ifndef arch_check_zapped_pud
+static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
+{
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
-- 
cgit v1.2.3


From cb0f01beb16669e91510fcdb2cea213931aee017 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 12 Aug 2024 14:12:25 -0400
Subject: mm/mprotect: fix dax pud handlings

This is only relevant to the two archs that support PUD dax, aka, x86_64
and ppc64.  PUD THPs do not yet exist elsewhere, and hugetlb PUDs do not
count in this case.

DAX have had PUD mappings for years, but change protection path never
worked.  When the path is triggered in any form (a simple test program
would be: call mprotect() on a 1G dev_dax mapping), the kernel will report
"bad pud".  This patch should fix that.

The new change_huge_pud() tries to keep everything simple.  For example,
it doesn't optimize write bit as that will need even more PUD helpers.
It's not too bad anyway to have one more write fault in the worst case
once for 1G range; may be a bigger thing for each PAGE_SIZE, though.
Neither does it support userfault-wp bits, as there isn't such PUD
mappings that is supported; file mappings always need a split there.

The same to TLB shootdown: the pmd path (which was for x86 only) has the
trick of using _ad() version of pmdp_invalidate*() which can avoid one
redundant TLB, but let's also leave that for later.  Again, the larger the
mapping, the smaller of such effect.

There's some difference on handling "retry" for change_huge_pud() (where
it can return 0): it isn't like change_huge_pmd(), as the pmd version is
safe with all conditions handled in change_pte_range() later, thanks to
Hugh's new pte_offset_map_lock().  In short, change_pte_range() is simply
smarter.  For that, change_pud_range() will need proper retry if it races
with something else when a huge PUD changed from under us.

The last thing to mention is currently the PUD path ignores the huge pte
numa counter (NUMA_HUGE_PTE_UPDATES), not only because DAX is not
applicable to NUMA, but also that it's ambiguous on its own to decide how
to account pud in this case.  In one earlier version of this patchset I
proposed to remove the counter as it doesn't even look right to do the
accounting as of now [1], but then a further discussion suggests we can
leave that for later, as that doesn't block this series if we choose to
ignore that counter.  That's what this patch does, by ignoring it.

When at it, touch up the comment in pgtable_split_needed() to make it
generic to either pmd or pud file THPs.

[1] https://lore.kernel.org/all/20240715192142.3241557-3-peterx@redhat.com/
[2] https://lore.kernel.org/r/added2d0-b8be-4108-82ca-1367a388d0b1@redhat.com

Link: https://lkml.kernel.org/r/20240812181225.1360970-8-peterx@redhat.com
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Fixes: 27af67f35631 ("powerpc/book3s64/mm: enable transparent pud hugepage")
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ce44caa40eed..6370026689e0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -342,6 +342,17 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
 
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+		    pud_t *pudp, unsigned long addr, pgprot_t newprot,
+		    unsigned long cp_flags);
+#else
+static inline int
+change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+		pud_t *pudp, unsigned long addr, pgprot_t newprot,
+		unsigned long cp_flags) { return 0; }
+#endif
+
 #define split_huge_pud(__vma, __pud, __address)				\
 	do {								\
 		pud_t *____pud = (__pud);				\
@@ -585,6 +596,19 @@ static inline int next_order(unsigned long *orders, int prev)
 {
 	return 0;
 }
+
+static inline void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+				    unsigned long address)
+{
+}
+
+static inline int change_huge_pud(struct mmu_gather *tlb,
+				  struct vm_area_struct *vma, pud_t *pudp,
+				  unsigned long addr, pgprot_t newprot,
+				  unsigned long cp_flags)
+{
+	return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline int split_folio_to_list_to_order(struct folio *folio,
-- 
cgit v1.2.3


From b6273b55d88539c6a7127a697c61d3f89c5831fe Mon Sep 17 00:00:00 2001
From: Takaya Saeki <takayas@chromium.org>
Date: Tue, 13 Aug 2024 10:03:12 +0000
Subject: filemap: add trace events for get_pages, map_pages, and fault

To allow precise tracking of page caches accessed, add new tracepoints
that trigger when a process actually accesses them.

The ureadahead program used by ChromeOS traces the disk access of programs
as they start up at boot up.  It uses mincore(2) or the
'mm_filemap_add_to_page_cache' trace event to accomplish this.  It stores
this information in a "pack" file and on subsequent boots, it will read
the pack file and call readahead(2) on the information so that disk
storage can be loaded into RAM before the applications actually need it.

A problem we see is that due to the kernel's readahead algorithm that can
aggressively pull in more data than needed (to try and accomplish the same
goal) and this data is also recorded.  The end result is that the pack
file contains a lot of pages on disk that are never actually used.
Calling readahead(2) on these unused pages can slow down the system boot
up times.

To solve this, add 3 new trace events, get_pages, map_pages, and fault.
These will be used to trace the pages are not only pulled in from disk,
but are actually used by the application.  Only those pages will be stored
in the pack file, and this helps out the performance of boot up.

With the combination of these 3 new trace events and
mm_filemap_add_to_page_cache, we observed a reduction in the pack file by
7.3% - 20% on ChromeOS varying by device.

Link: https://lkml.kernel.org/r/20240813100312.3930505-1-takayas@chromium.org
Signed-off-by: Takaya Saeki <takayas@chromium.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Junichi Uekawa <uekawa@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/filemap.h | 84 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index 46c89c1e460c..f48fe637bfd2 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -56,6 +56,90 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache,
 	TP_ARGS(folio)
 	);
 
+DECLARE_EVENT_CLASS(mm_filemap_op_page_cache_range,
+
+	TP_PROTO(
+		struct address_space *mapping,
+		pgoff_t index,
+		pgoff_t last_index
+	),
+
+	TP_ARGS(mapping, index, last_index),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned long, index)
+		__field(unsigned long, last_index)
+	),
+
+	TP_fast_assign(
+		__entry->i_ino = mapping->host->i_ino;
+		if (mapping->host->i_sb)
+			__entry->s_dev =
+				mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = mapping->host->i_rdev;
+		__entry->index = index;
+		__entry->last_index = last_index;
+	),
+
+	TP_printk(
+		"dev=%d:%d ino=%lx ofs=%lld-%lld",
+		MAJOR(__entry->s_dev),
+		MINOR(__entry->s_dev), __entry->i_ino,
+		((loff_t)__entry->index) << PAGE_SHIFT,
+		((((loff_t)__entry->last_index + 1) << PAGE_SHIFT) - 1)
+	)
+);
+
+DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_get_pages,
+	TP_PROTO(
+		struct address_space *mapping,
+		pgoff_t index,
+		pgoff_t last_index
+	),
+	TP_ARGS(mapping, index, last_index)
+);
+
+DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_map_pages,
+	TP_PROTO(
+		struct address_space *mapping,
+		pgoff_t index,
+		pgoff_t last_index
+	),
+	TP_ARGS(mapping, index, last_index)
+);
+
+TRACE_EVENT(mm_filemap_fault,
+	TP_PROTO(struct address_space *mapping, pgoff_t index),
+
+	TP_ARGS(mapping, index),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned long, index)
+	),
+
+	TP_fast_assign(
+		__entry->i_ino = mapping->host->i_ino;
+		if (mapping->host->i_sb)
+			__entry->s_dev =
+				mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = mapping->host->i_rdev;
+		__entry->index = index;
+	),
+
+	TP_printk(
+		"dev=%d:%d ino=%lx ofs=%lld",
+		MAJOR(__entry->s_dev),
+		MINOR(__entry->s_dev), __entry->i_ino,
+		((loff_t)__entry->index) << PAGE_SHIFT
+	)
+);
+
 TRACE_EVENT(filemap_set_wb_err,
 		TP_PROTO(struct address_space *mapping, errseq_t eseq),
 
-- 
cgit v1.2.3


From 223febc6e5573cda5e94e6b38fcdaded068db777 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 12 Aug 2024 18:26:02 +1000
Subject: mm: add optional close() to struct vm_special_mapping

Add an optional close() callback to struct vm_special_mapping.  It will be
used, by powerpc at least, to handle unmapping of the VDSO.

Although support for unmapping the VDSO was initially added for CRIU[1],
it is not desirable to guard that support behind
CONFIG_CHECKPOINT_RESTORE.

There are other known users of unmapping the VDSO which are not related to
CRIU, eg.  Valgrind [2] and void-ship [3].

The powerpc arch_unmap() hook has been in place for ~9 years, with no
ifdef, so there may be other unknown users that have come to rely on
unmapping the VDSO.  Even if the code was behind an ifdef, major distros
enable CHECKPOINT_RESTORE so users may not realise unmapping the VDSO
depends on that configuration option.

It's also undesirable to have such core mm behaviour behind a relatively
obscure CONFIG option.

Longer term the unmap behaviour should be standardised across
architectures, however that is complicated by the fact the VDSO pointer is
stored differently across architectures.  There was a previous attempt to
unify that handling [4], which could be revived.

See [5] for further discussion.

[1]: commit 83d3f0e90c6c ("powerpc/mm: tracking vDSO remap")
[2]: https://sourceware.org/git/?p=valgrind.git;a=commit;h=3a004915a2cbdcdebafc1612427576bf3321eef5
[3]: https://github.com/insanitybit/void-ship
[4]: https://lore.kernel.org/lkml/20210611180242.711399-17-dima@arista.com/
[5]: https://lore.kernel.org/linuxppc-dev/shiq5v3jrmyi6ncwke7wgl76ojysgbhrchsk32q4lbx2hadqqc@kzyy2igem256

Link: https://lkml.kernel.org/r/20240812082605.743814-1-mpe@ellerman.id.au
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pedro Falcato <pedro.falcato@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 003619fab20e..2419e60c9a7f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1324,6 +1324,9 @@ struct vm_special_mapping {
 
 	int (*mremap)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *new_vma);
+
+	void (*close)(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *vma);
 };
 
 enum tlb_flush_reason {
-- 
cgit v1.2.3


From 40b88644dd92d99e572063893c2a247598c238d6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 12 Aug 2024 18:26:04 +1000
Subject: mm: remove arch_unmap()

Now that powerpc no longer uses arch_unmap() to handle VDSO unmapping,
there are no meaningful implementions left.  Drop support for it entirely,
and update comments which refer to it.

Link: https://lkml.kernel.org/r/20240812082605.743814-3-mpe@ellerman.id.au
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pedro Falcato <pedro.falcato@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/mm_hooks.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index 4dbb177d1150..6eea3b3c1e65 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap
- * and arch_unmap to be included in asm-FOO/mmu_context.h for any
- * arch FOO which doesn't need to hook these.
+ * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap
+ * to be included in asm-FOO/mmu_context.h for any arch FOO which
+ * doesn't need to hook these.
  */
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
@@ -17,11 +17,6 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
 {
 }
 
-static inline void arch_unmap(struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-}
-
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 		bool write, bool execute, bool foreign)
 {
-- 
cgit v1.2.3


From 497258dfafcc6b88e7c4c46a38a479721d44cf41 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 20 Aug 2024 15:14:47 -0700
Subject: mm: remove legacy install_special_mapping() code

All relevant architectures had already been converted to the new interface
(which just has an underscore in front of the name - not very imaginative
naming), this just force-converts the stragglers.

The modern interface is almost identical to the old one, except instead of
the page pointer it takes a "struct vm_special_mapping" that describes the
mapping (and contains the page pointer as one member), and it returns the
resulting 'vma' instead of just the error code.

Getting rid of the old interface also gets rid of some special casing,
which had caused problems with the mremap extensions to "struct
vm_special_mapping".

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/CAHk-=whvR+z=0=0gzgdfUiK70JTa-=+9vxD-4T=3BagXR6dciA@mail.gmail.comTested-by: Rob Landley <rob@landley.net> # arch/sh/
Link: https://lore.kernel.org/all/20240819195120.GA1113263@thelio-3990X/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pedro Falcato <pedro.falcato@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Landley <rob@landley.net>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b1eed30fdc06..0fde51c0532f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3263,10 +3263,6 @@ extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags,
 				   const struct vm_special_mapping *spec);
-/* This is an obsolete alternative to _install_special_mapping. */
-extern int install_special_mapping(struct mm_struct *mm,
-				   unsigned long addr, unsigned long len,
-				   unsigned long flags, struct page **pages);
 
 unsigned long randomize_stack_top(unsigned long stack_top);
 unsigned long randomize_page(unsigned long start, unsigned long range);
-- 
cgit v1.2.3


From 02f4bbefcada38cab86b365e5c795e0f858356bc Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Wed, 14 Aug 2024 17:34:15 +0800
Subject: mm: kmem: add lockdep assertion to obj_cgroup_memcg

obj_cgroup_memcg() is supposed to safe to prevent the returned memory
cgroup from being freed only when the caller is holding the rcu read lock
or objcg_lock or cgroup_mutex.  It is very easy to ignore thoes conditions
when users call some upper APIs which call obj_cgroup_memcg() internally
like mem_cgroup_from_slab_obj() (See the link below).  So it is better to
add lockdep assertion to obj_cgroup_memcg() to find those issues ASAP.

Because there is no user of obj_cgroup_memcg() holding objcg_lock to make
the returned memory cgroup safe, do not add objcg_lock assertion (We
should export objcg_lock if we really want to do).  Additionally, this is
some internal implementation detail of memcg and should not be accessible
outside memcg code.

Some users like __mem_cgroup_uncharge() do not care the lifetime of the
returned memory cgroup, which just want to know if the folio is charged to
a memory cgroup, therefore, they do not need to hold the needed locks.  In
which case, introduce a new helper folio_memcg_charged() to do this.
Compare it to folio_memcg(), it could eliminate a memory access of
objcg->memcg for kmem, actually, a really small gain.

[songmuchun@bytedance.com: fix split_page_memcg()]
  Link: https://lkml.kernel.org/r/20240819080415.44964-1-songmuchun@bytedance.com
Link: https://lore.kernel.org/all/20240718083607.42068-1-songmuchun@bytedance.com/
Link: https://lkml.kernel.org/r/20240814093415.17634-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1b79760af685..07eadf7ecbba 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -366,11 +366,11 @@ static inline bool folio_memcg_kmem(struct folio *folio);
  * After the initialization objcg->memcg is always pointing at
  * a valid memcg, but can be atomically swapped to the parent memcg.
  *
- * The caller must ensure that the returned memcg won't be released:
- * e.g. acquire the rcu_read_lock or css_set_lock.
+ * The caller must ensure that the returned memcg won't be released.
  */
 static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
 {
+	lockdep_assert_once(rcu_read_lock_held() || lockdep_is_held(&cgroup_mutex));
 	return READ_ONCE(objcg->memcg);
 }
 
@@ -444,6 +444,19 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return __folio_memcg(folio);
 }
 
+/*
+ * folio_memcg_charged - If a folio is charged to a memory cgroup.
+ * @folio: Pointer to the folio.
+ *
+ * Returns true if folio is charged to a memory cgroup, otherwise returns false.
+ */
+static inline bool folio_memcg_charged(struct folio *folio)
+{
+	if (folio_memcg_kmem(folio))
+		return __folio_objcg(folio) != NULL;
+	return __folio_memcg(folio) != NULL;
+}
+
 /**
  * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
  * @folio: Pointer to the folio.
@@ -460,7 +473,6 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 	unsigned long memcg_data = READ_ONCE(folio->memcg_data);
 
 	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
-	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	if (memcg_data & MEMCG_DATA_KMEM) {
 		struct obj_cgroup *objcg;
@@ -469,6 +481,8 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 		return obj_cgroup_memcg(objcg);
 	}
 
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
 	return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
 }
 
-- 
cgit v1.2.3


From bd164d81a767f33c30d5c5b50b775631699ee976 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Wed, 14 Aug 2024 12:19:28 -0400
Subject: maple_tree: introduce store_type enum

Patch series "Introduce a store type enum for the Maple tree", v4.

================================ OVERVIEW ================================

This series implements two work items[3]: "aligning mas_store_gfp() with
mas_preallocate()" and "enum for store type".

mas_store_gfp() is modified to preallocate nodes.  This simplies many of
the write helper functions by allowing them to use mas_store_gfp() rather
than open coding node allocation and error handling.

The enum defines the following store types:

enum store_type {
    wr_invalid,
    wr_new_root,
    wr_store_root,
    wr_exact_fit,
    wr_spanning_store,
    wr_split_store,
    wr_rebalance,
    wr_append,
    wr_node_store,
    wr_slot_store,
};

In the current maple tree code, a walk down the tree is done in
mas_preallocate() to determine the number of nodes needed for this write.
After node allocation, mas_wr_store_entry() will perform another walk to
determine which write helper function to use to complete the write.

Rather than performing the second walk, we can store the type of write in
the maple write state during node allocation and read this field to
complete the write.

Patches 1-16 implement this store type feature.
Patch 17 is a cleanup patch to change functions that have unused return
types to be void.

================================ RESULTS =================================

Phoronix t-test-1 (Seconds < Lower Is Better)
    v6.10-rc6
        Threads: 1
            33.15

        Threads: 2
            10.81

    v6.10-rc6 + this series
            Threads: 1
            32.69

        Threads: 2
            10.45

Stress-ng mmap
                    6.10_base  store_type_v4
Duration User        2744.65     2769.40
Duration System     10862.69    10817.59
Duration Elapsed     1477.58     1478.35


================================ TESTING =================================

Testing was done with the maple tree test suite.  A new test case is also
added to validate the order in which we test for and assign the store
type.

[1]: https://lore.kernel.org/linux-mm/80926b22-a8d2-9992-eb5e-27e2c99cf460@google.com/T/#m81044feb66765265f8ca7f21e4b4b3725b18780a
[2]: https://lore.kernel.org/linux-mm/80926b22-a8d2-9992-eb5e-27e2c99cf460@google.com/T/#mb36c6526486638e82518c0f37a428fb279c84d8a
[3]: https://lists.infradead.org/pipermail/maple-tree/2023-December/003098.html


This patch (of 17):

Add a store_type enum that is stored in ma_state.  This will be used to
keep track of partial walks of the tree so that subsequent walks can pick
up where a previous walk left off.

Link: https://lkml.kernel.org/r/20240814161944.55347-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20240814161944.55347-2-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index a53ad4dabd7e..8e1504a81cd2 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -148,6 +148,18 @@ enum maple_type {
 	maple_arange_64,
 };
 
+enum store_type {
+	wr_invalid,
+	wr_new_root,
+	wr_store_root,
+	wr_exact_fit,
+	wr_spanning_store,
+	wr_split_store,
+	wr_rebalance,
+	wr_append,
+	wr_node_store,
+	wr_slot_store,
+};
 
 /**
  * DOC: Maple tree flags
@@ -436,6 +448,7 @@ struct ma_state {
 	unsigned char offset;
 	unsigned char mas_flags;
 	unsigned char end;		/* The end of the node */
+	enum store_type store_type;	/* The type of store needed for this operation */
 };
 
 struct ma_wr_state {
@@ -477,6 +490,7 @@ struct ma_wr_state {
 		.max = ULONG_MAX,					\
 		.alloc = NULL,						\
 		.mas_flags = 0,						\
+		.store_type = wr_invalid,				\
 	}
 
 #define MA_WR_STATE(name, ma_state, wr_entry)				\
-- 
cgit v1.2.3


From 5d383b69a04e2eb2e0ae06e91821fd82cb9acf73 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Wed, 14 Aug 2024 22:04:47 -0700
Subject: memcg: move v1 only percpu stats in separate struct

Patch series "memcg: further decouple v1 code from v2".

Some of the v1 code is still in v2 code base due to v1 fields in the
struct memcg_vmstats_percpu.  This field decouples those fileds from v2
struct and move all the related code into v1 only code base.


This patch (of 7):

At the moment struct memcg_vmstats_percpu contains two v1 only fields
which consumes memory even when CONFIG_MEMCG_V1 is not enabled.  In
addition there are v1 only functions accessing them and are in the main
memcontrol source file and can not be moved to v1 only source file due to
these fields.  Let's move these fields into their own struct.  Later
patches will move the functions accessing them to v1 source file and only
allocate these fields when CONFIG_MEMCG_V1 is enabled.

Link: https://lkml.kernel.org/r/20240815050453.1298138-1-shakeel.butt@linux.dev
Link: https://lkml.kernel.org/r/20240815050453.1298138-2-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: T.J. Mercier <tjmercier@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 07eadf7ecbba..08b7121cf43a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -70,6 +70,7 @@ struct mem_cgroup_id {
 };
 
 struct memcg_vmstats_percpu;
+struct memcg1_events_percpu;
 struct memcg_vmstats;
 struct lruvec_stats_percpu;
 struct lruvec_stats;
@@ -254,6 +255,7 @@ struct mem_cgroup {
 	struct list_head objcg_list;
 
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
+	struct memcg1_events_percpu __percpu *events_percpu;
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head cgwb_list;
-- 
cgit v1.2.3


From 0ccaf421d6592bb99bb9b424e4ccca3c6367d799 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Wed, 14 Aug 2024 22:04:52 -0700
Subject: memcg: allocate v1 event percpu only on v1 deployment

Currently memcg->events_percpu gets allocated on v2 deployments.  Let's
move the allocation to v1 only codebase.  This is not needed in v2.

Link: https://lkml.kernel.org/r/20240815050453.1298138-7-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: T.J. Mercier <tjmercier@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 08b7121cf43a..ed170399179a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -255,7 +255,6 @@ struct mem_cgroup {
 	struct list_head objcg_list;
 
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
-	struct memcg1_events_percpu __percpu *events_percpu;
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head cgwb_list;
@@ -277,6 +276,8 @@ struct mem_cgroup {
 	struct page_counter kmem;		/* v1 only */
 	struct page_counter tcpmem;		/* v1 only */
 
+	struct memcg1_events_percpu __percpu *events_percpu;
+
 	unsigned long soft_limit;
 
 	/* protected by memcg_oom_lock */
-- 
cgit v1.2.3


From 836d13a6ef8a2eb0eab2bd2de06f2deabc62b060 Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Sun, 21 Jul 2024 16:36:18 +0300
Subject: xz: switch from public domain to BSD Zero Clause License (0BSD)

Remove the public domain notices and add SPDX license identifiers.

Change MODULE_LICENSE from "GPL" to "Dual BSD/GPL" because 0BSD should
count as a BSD license variant here.

The switch to 0BSD was done in the upstream XZ Embedded project because
public domain has (real or perceived) legal issues in some jurisdictions.

Link: https://lkml.kernel.org/r/20240721133633.47721-4-lasse.collin@tukaani.org
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Reviewed-by: Sam James <sam@gentoo.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jubin Zhong <zhongjubin@huawei.com>
Cc: Jules Maselbas <jmaselbas@zdiv.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rui Li <me@lirui.org>
Cc: Simon Glass <sjg@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/decompress/unxz.h | 5 ++---
 include/linux/xz.h              | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index f764e2a7201e..3dd2658a9dab 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -1,10 +1,9 @@
+/* SPDX-License-Identifier: 0BSD */
+
 /*
  * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #ifndef DECOMPRESS_UNXZ_H
diff --git a/include/linux/xz.h b/include/linux/xz.h
index 7285ca5d56e9..5728d57aecc0 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -1,11 +1,10 @@
+/* SPDX-License-Identifier: 0BSD */
+
 /*
  * XZ decompressor
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <https://7-zip.org/>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #ifndef XZ_H
-- 
cgit v1.2.3


From ad8c67b870d108aa1286f4fc76d0c29a736fd75e Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Sun, 21 Jul 2024 16:36:20 +0300
Subject: xz: fix kernel-doc formatting errors in xz.h

The opaque structs xz_dec and xz_dec_microlzma are declared in xz.h but
their definitions are in xz_dec_lzma2.c without kernel-doc comments.  Use
regular comments for these structs in xz.h to avoid errors when building
the docs.

Add a few missing colons.

Link: https://lkml.kernel.org/r/20240721133633.47721-6-lasse.collin@tukaani.org
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Reviewed-by: Sam James <sam@gentoo.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jubin Zhong <zhongjubin@huawei.com>
Cc: Jules Maselbas <jmaselbas@zdiv.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rui Li <me@lirui.org>
Cc: Simon Glass <sjg@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/xz.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/xz.h b/include/linux/xz.h
index 5728d57aecc0..af1e075d9add 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -142,7 +142,7 @@ struct xz_buf {
 	size_t out_size;
 };
 
-/**
+/*
  * struct xz_dec - Opaque type to hold the XZ decoder state
  */
 struct xz_dec;
@@ -240,15 +240,16 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s);
  * marked with XZ_EXTERN. This avoids warnings about static functions that
  * are never defined.
  */
-/**
+
+/*
  * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
  */
 struct xz_dec_microlzma;
 
 /**
  * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
- * @mode        XZ_SINGLE or XZ_PREALLOC
- * @dict_size   LZMA dictionary size. This must be at least 4 KiB and
+ * @mode:       XZ_SINGLE or XZ_PREALLOC
+ * @dict_size:  LZMA dictionary size. This must be at least 4 KiB and
  *              at most 3 GiB.
  *
  * In contrast to xz_dec_init(), this function only allocates the memory
@@ -276,15 +277,15 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
 
 /**
  * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
- * @s           Decoder state allocated using xz_dec_microlzma_alloc()
- * @comp_size   Compressed size of the input stream
- * @uncomp_size Uncompressed size of the input stream. A value smaller
+ * @s:          Decoder state allocated using xz_dec_microlzma_alloc()
+ * @comp_size:  Compressed size of the input stream
+ * @uncomp_size:  Uncompressed size of the input stream. A value smaller
  *              than the real uncompressed size of the input stream can
  *              be specified if uncomp_size_is_exact is set to false.
  *              uncomp_size can never be set to a value larger than the
  *              expected real uncompressed size because it would eventually
  *              result in XZ_DATA_ERROR.
- * @uncomp_size_is_exact  This is an int instead of bool to avoid
+ * @uncomp_size_is_exact:  This is an int instead of bool to avoid
  *              requiring stdbool.h. This should normally be set to true.
  *              When this is set to false, error detection is weaker.
  */
@@ -294,7 +295,7 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
 
 /**
  * xz_dec_microlzma_run() - Run the MicroLZMA decoder
- * @s           Decoder state initialized using xz_dec_microlzma_reset()
+ * @s:          Decoder state initialized using xz_dec_microlzma_reset()
  * @b:          Input and output buffers
  *
  * This works similarly to xz_dec_run() with a few important differences.
-- 
cgit v1.2.3


From 0f2c5996340b69d167d3f6ca38d3012204787ac1 Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Sun, 21 Jul 2024 16:36:21 +0300
Subject: xz: improve the MicroLZMA kernel-doc in xz.h

Move the description of the format into a "DOC:" comment.  Emphasize that
MicroLZMA functions aren't usually needed.

Link: https://lkml.kernel.org/r/20240721133633.47721-7-lasse.collin@tukaani.org
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Reviewed-by: Sam James <sam@gentoo.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jubin Zhong <zhongjubin@huawei.com>
Cc: Jules Maselbas <jmaselbas@zdiv.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rui Li <me@lirui.org>
Cc: Simon Glass <sjg@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/xz.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/xz.h b/include/linux/xz.h
index af1e075d9add..701d62c02b9a 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -232,9 +232,18 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
  */
 XZ_EXTERN void xz_dec_end(struct xz_dec *s);
 
-/*
- * Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
- * See xz_dec_microlzma_alloc() below for details.
+/**
+ * DOC: MicroLZMA decompressor
+ *
+ * This MicroLZMA header format was created for use in EROFS but may be used
+ * by others too. **In most cases one needs the XZ APIs above instead.**
+ *
+ * The compressed format supported by this decoder is a raw LZMA stream
+ * whose first byte (always 0x00) has been replaced with bitwise-negation
+ * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
+ * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
+ * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
+ * marker must not be used. The unused values are reserved for future use.
  *
  * These functions aren't used or available in preboot code and thus aren't
  * marked with XZ_EXTERN. This avoids warnings about static functions that
@@ -262,15 +271,6 @@ struct xz_dec_microlzma;
  * On success, xz_dec_microlzma_alloc() returns a pointer to
  * struct xz_dec_microlzma. If memory allocation fails or
  * dict_size is invalid, NULL is returned.
- *
- * The compressed format supported by this decoder is a raw LZMA stream
- * whose first byte (always 0x00) has been replaced with bitwise-negation
- * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
- * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
- * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
- * marker must not be used. The unused values are reserved for future use.
- * This MicroLZMA header format was created for use in EROFS but may be used
- * by others too.
  */
 extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
 						       uint32_t dict_size);
-- 
cgit v1.2.3


From c6f371bab25edccd39caa5dd452b50d9dfdf4ff0 Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Wed, 24 Jul 2024 14:05:41 +0300
Subject: xz: remove XZ_EXTERN and extern from functions

XZ_EXTERN was used to make internal functions static in the preboot code.
However, in other decompressors this hasn't been done.  On x86-64, this
makes no difference to the kernel image size.

Omit XZ_EXTERN and let some of the internal functions be extern in the
preboot code.  Omitting XZ_EXTERN from include/linux/xz.h fixes warnings
in "make htmldocs" and makes the intradocument links to xz_dec functions
work in Documentation/staging/xz.rst.  The alternative would have been to
add "XZ_EXTERN" to c_id_attributes in Documentation/conf.py but omitting
XZ_EXTERN seemed cleaner.

Link: https://lore.kernel.org/lkml/20240723205437.3c0664b0@kaneli/
Link: https://lkml.kernel.org/r/20240724110544.16430-1-lasse.collin@tukaani.org
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Sam James <sam@gentoo.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Jubin Zhong <zhongjubin@huawei.com>
Cc: Jules Maselbas <jmaselbas@zdiv.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rui Li <me@lirui.org>
Cc: Simon Glass <sjg@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/xz.h | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/xz.h b/include/linux/xz.h
index 701d62c02b9a..58ae1d746c6f 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -18,11 +18,6 @@
 #	include <stdint.h>
 #endif
 
-/* In Linux, this is used to make extern functions static when needed. */
-#ifndef XZ_EXTERN
-#	define XZ_EXTERN extern
-#endif
-
 /**
  * enum xz_mode - Operation mode
  *
@@ -190,7 +185,7 @@ struct xz_dec;
  * ready to be used with xz_dec_run(). If memory allocation fails,
  * xz_dec_init() returns NULL.
  */
-XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
+struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
 
 /**
  * xz_dec_run() - Run the XZ decoder
@@ -210,7 +205,7 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
  * get that amount valid data from the beginning of the stream. You must use
  * the multi-call decoder if you don't want to uncompress the whole stream.
  */
-XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
+enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
 
 /**
  * xz_dec_reset() - Reset an already allocated decoder state
@@ -223,14 +218,14 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
  * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
  * multi-call mode.
  */
-XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
+void xz_dec_reset(struct xz_dec *s);
 
 /**
  * xz_dec_end() - Free the memory allocated for the decoder state
  * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
  *              this function does nothing.
  */
-XZ_EXTERN void xz_dec_end(struct xz_dec *s);
+void xz_dec_end(struct xz_dec *s);
 
 /**
  * DOC: MicroLZMA decompressor
@@ -244,10 +239,6 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s);
  * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
  * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
  * marker must not be used. The unused values are reserved for future use.
- *
- * These functions aren't used or available in preboot code and thus aren't
- * marked with XZ_EXTERN. This avoids warnings about static functions that
- * are never defined.
  */
 
 /*
@@ -272,8 +263,8 @@ struct xz_dec_microlzma;
  * struct xz_dec_microlzma. If memory allocation fails or
  * dict_size is invalid, NULL is returned.
  */
-extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
-						       uint32_t dict_size);
+struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
+						uint32_t dict_size);
 
 /**
  * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
@@ -289,9 +280,8 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
  *              requiring stdbool.h. This should normally be set to true.
  *              When this is set to false, error detection is weaker.
  */
-extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
-				   uint32_t comp_size, uint32_t uncomp_size,
-				   int uncomp_size_is_exact);
+void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
+			    uint32_t uncomp_size, int uncomp_size_is_exact);
 
 /**
  * xz_dec_microlzma_run() - Run the MicroLZMA decoder
@@ -329,15 +319,14 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
  * may be changed normally like with XZ_PREALLOC. This way input data can be
  * provided from non-contiguous memory.
  */
-extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
-					struct xz_buf *b);
+enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, struct xz_buf *b);
 
 /**
  * xz_dec_microlzma_end() - Free the memory allocated for the decoder state
  * @s:          Decoder state allocated using xz_dec_microlzma_alloc().
  *              If s is NULL, this function does nothing.
  */
-extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
+void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
 
 /*
  * Standalone build (userspace build or in-kernel build for boot time use)
@@ -358,13 +347,13 @@ extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
  * This must be called before any other xz_* function to initialize
  * the CRC32 lookup table.
  */
-XZ_EXTERN void xz_crc32_init(void);
+void xz_crc32_init(void);
 
 /*
  * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
  * calculation, the third argument must be zero. To continue the calculation,
  * the previously returned value is passed as the third argument.
  */
-XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc);
+uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc);
 #endif
 #endif
-- 
cgit v1.2.3


From 9a42bfd255b288dad2d1a9df0a1fe58394d5da12 Mon Sep 17 00:00:00 2001
From: Deshan Zhang <deshan@nfschina.com>
Date: Thu, 25 Jul 2024 17:30:45 +0800
Subject: lib/lru_cache: fix spelling mistake "colision"->"collision"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a spelling mistake in a literal string and in cariable names.
Fix these.

Link: https://lkml.kernel.org/r/20240725093044.1742842-1-deshan@nfschina.com
Signed-off-by: Deshan Zhang <deshan@nfschina.com>
Cc: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Cc: Lars Ellenberg <lars.ellenberg@linbit.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/lru_cache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index c9afcdd9324c..ff82ef85a084 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -119,7 +119,7 @@ write intent log information, three of which are mentioned here.
 */
 
 /* this defines an element in a tracked set
- * .colision is for hash table lookup.
+ * .collision is for hash table lookup.
  * When we process a new IO request, we know its sector, thus can deduce the
  * region number (label) easily.  To do the label -> object lookup without a
  * full list walk, we use a simple hash table.
@@ -145,7 +145,7 @@ write intent log information, three of which are mentioned here.
  * But it avoids high order page allocations in kmalloc.
  */
 struct lc_element {
-	struct hlist_node colision;
+	struct hlist_node collision;
 	struct list_head list;		 /* LRU list or free list */
 	unsigned refcnt;
 	/* back "pointer" into lc_cache->element[index],
-- 
cgit v1.2.3


From 6ce2082fd3a25d5a8c756120959237cace0379f1 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 13 Aug 2024 15:12:35 +0300
Subject: fault-inject: improve build for CONFIG_FAULT_INJECTION=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fault-inject.h users across the kernel need to add a lot of #ifdef
CONFIG_FAULT_INJECTION to cater for shortcomings in the header.  Make
fault-inject.h self-contained for CONFIG_FAULT_INJECTION=n, and add stubs
for DECLARE_FAULT_ATTR(), setup_fault_attr(), should_fail_ex(), and
should_fail() to allow removal of conditional compilation.

[akpm@linux-foundation.org: repair fallout from no longer including debugfs.h into fault-inject.h]
[akpm@linux-foundation.org: fix drivers/misc/xilinx_tmr_inject.c]
[akpm@linux-foundation.org: Add debugfs.h inclusion to more files, per Stephen]
Link: https://lkml.kernel.org/r/20240813121237.2382534-1-jani.nikula@intel.com
Fixes: 6ff1cb355e62 ("[PATCH] fault-injection capabilities infrastructure")
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Abhinav Kumar <quic_abhinavk@quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fault-inject.h | 36 +++++++++++++++++++++++++++++-------
 include/linux/mmc/host.h     |  1 +
 include/ufs/ufshcd.h         |  1 +
 3 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 354413950d34..8c829d28dcf3 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -2,13 +2,17 @@
 #ifndef _LINUX_FAULT_INJECT_H
 #define _LINUX_FAULT_INJECT_H
 
+#include <linux/err.h>
+#include <linux/types.h>
+
+struct dentry;
+struct kmem_cache;
+
 #ifdef CONFIG_FAULT_INJECTION
 
-#include <linux/types.h>
-#include <linux/debugfs.h>
+#include <linux/atomic.h>
 #include <linux/configfs.h>
 #include <linux/ratelimit.h>
-#include <linux/atomic.h>
 
 /*
  * For explanation of the elements of this struct, see
@@ -51,6 +55,28 @@ int setup_fault_attr(struct fault_attr *attr, char *str);
 bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
 bool should_fail(struct fault_attr *attr, ssize_t size);
 
+#else /* CONFIG_FAULT_INJECTION */
+
+struct fault_attr {
+};
+
+#define DECLARE_FAULT_ATTR(name) struct fault_attr name = {}
+
+static inline int setup_fault_attr(struct fault_attr *attr, char *str)
+{
+	return 0; /* Note: 0 means error for __setup() handlers! */
+}
+static inline bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
+{
+	return false;
+}
+static inline bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+	return false;
+}
+
+#endif /* CONFIG_FAULT_INJECTION */
+
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 
 struct dentry *fault_create_debugfs_attr(const char *name,
@@ -87,10 +113,6 @@ static inline void fault_config_init(struct fault_config *config,
 
 #endif /* CONFIG_FAULT_INJECTION_CONFIGFS */
 
-#endif /* CONFIG_FAULT_INJECTION */
-
-struct kmem_cache;
-
 #ifdef CONFIG_FAIL_PAGE_ALLOC
 bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
 #else
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 88c6a76042ee..49470188fca7 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
+#include <linux/debugfs.h>
 
 #include <linux/mmc/core.h>
 #include <linux/mmc/card.h>
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 0fd2aebac728..3f68ae3e4330 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -17,6 +17,7 @@
 #include <linux/blk-mq.h>
 #include <linux/devfreq.h>
 #include <linux/fault-inject.h>
+#include <linux/debugfs.h>
 #include <linux/msi.h>
 #include <linux/pm_runtime.h>
 #include <linux/dma-direction.h>
-- 
cgit v1.2.3


From d994c238347d7ba4de15da00985e1bea75e91dc7 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Sat, 17 Aug 2024 14:37:54 +0200
Subject: ratelimit: convert flags to int to save 8 bytes in size

Only bit 1 is used, making an unsigned long a total overkill.

This brings it from 40 to 32 bytes, which in turn shrinks user_struct from
136 to 128 bytes.  Since the latter is allocated with hwalign, this means
the total usage goes down from 192 to 128 bytes per object.

No functional changes.

Link: https://lkml.kernel.org/r/20240817123754.240924-1-mjguzik@gmail.com
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ratelimit_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index 002266693e50..765232ce0b5e 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -19,8 +19,8 @@ struct ratelimit_state {
 	int		burst;
 	int		printed;
 	int		missed;
+	unsigned int	flags;
 	unsigned long	begin;
-	unsigned long	flags;
 };
 
 #define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \
-- 
cgit v1.2.3


From 32cebfe1cc21a84e4a907575bb9fd1c3f6b091fd Mon Sep 17 00:00:00 2001
From: Hongbo Li <lihongbo22@huawei.com>
Date: Tue, 27 Aug 2024 10:45:15 +0800
Subject: lib/string_choices: add str_true_false()/str_false_true() helper

Add str_true_false()/str_false_true() helper to retur a "true" or "false"
string literal.  We found more than 10 cases currently exist in the tree.
So these helpers can be used for these cases.


This patch (of 3):

Add str_true_false()/str_false_true() helper to return "true" or "false"
string literal.

Link: https://lkml.kernel.org/r/20240827024517.914100-1-lihongbo22@huawei.com
Link: https://lkml.kernel.org/r/20240827024517.914100-2-lihongbo22@huawei.com
Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Cc: Andy Shevchenko <andy@kernel.org>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Trond Myklebust <trondmy@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/string_choices.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index d9ebe20229f8..4a2432313b8e 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -42,6 +42,12 @@ static inline const char *str_yes_no(bool v)
 	return v ? "yes" : "no";
 }
 
+static inline const char *str_true_false(bool v)
+{
+	return v ? "true" : "false";
+}
+#define str_false_true(v)		str_true_false(!(v))
+
 /**
  * str_plural - Return the simple pluralization based on English counts
  * @num: Number used for deciding pluralization
-- 
cgit v1.2.3


From 181028a0d84cdcc7ac86d05cc49eaa416ce85c8b Mon Sep 17 00:00:00 2001
From: Chandramohan Akula <chandramohan.akula@broadcom.com>
Date: Thu, 29 Aug 2024 08:34:05 -0700
Subject: RDMA/bnxt_re: Share a page to expose per SRQ info with userspace

Gen P7 adapters needs to share a toggle bits information received
in kernel driver with the user space. User space needs this
info to arm the SRQ.

User space application can get this page using the
UAPI routines. Library will mmap this page and get the
toggle bits to be used in the next ARM Doorbell.

Uses a hash list to map the SRQ structure from the SRQ ID.
SRQ structure is retrieved from the hash list while the
library calls the UAPI routine to get the toggle page
mapping. Currently the full page is mapped per SRQ. This
can be optimized to enable multiple SRQs from the same
application share the same page and different offsets
in the page

Signed-off-by: Chandramohan Akula <chandramohan.akula@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Link: https://patch.msgid.link/1724945645-14989-4-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/bnxt_re-abi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index 6821002931c8..faa9d62b3b30 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -141,8 +141,14 @@ struct bnxt_re_srq_req {
 	__aligned_u64 srq_handle;
 };
 
+enum bnxt_re_srq_mask {
+	BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT = 0x1,
+};
+
 struct bnxt_re_srq_resp {
 	__u32 srqid;
+	__u32 rsvd; /* padding */
+	__aligned_u64 comp_mask;
 };
 
 enum bnxt_re_shpg_offt {
-- 
cgit v1.2.3


From c4a315eaf8eff0d3234600e13db7e7c71c0b3405 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 21 Aug 2024 14:14:56 +0200
Subject: gpio: ath79: remove support for platform data

There are no more board files defining platform data for this driver so
remove the header and support from the driver.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240821121456.19553-4-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/platform_data/gpio-ath79.h | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 include/linux/platform_data/gpio-ath79.h

(limited to 'include')

diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h
deleted file mode 100644
index 3ea6dd942c27..000000000000
--- a/include/linux/platform_data/gpio-ath79.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Atheros AR7XXX/AR9XXX GPIO controller platform data
- *
- * Copyright (C) 2015 Alban Bedel <albeu@free.fr>
- */
-
-#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H
-#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H
-
-struct ath79_gpio_platform_data {
-	unsigned ngpios;
-	bool oe_inverted;
-};
-
-#endif
-- 
cgit v1.2.3


From afec1aba08607b18cfd00fdcd6525aeca0e187bf Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Mon, 29 Jul 2024 21:26:43 +0100
Subject: dt-bindings: clock: renesas: Document RZ/V2H(P) SoC CPG

Document the device tree bindings for the Renesas RZ/V2H(P) SoC
Clock Pulse Generator (CPG).

CPG block handles the below operations:
- Generation and control of clock signals for the IP modules
- Generation and control of resets
- Control over booting
- Low power consumption and power supply domains

Also define constants for the core clocks of the RZ/V2H(P) SoC. Note the
core clocks are a subset of the ones which are listed as part of section
4.4.2 of HW manual Rev.1.01 which cannot be controlled by CLKON register.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/20240729202645.263525-2-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/dt-bindings/clock/renesas,r9a09g057-cpg.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h
new file mode 100644
index 000000000000..541e6d719bd6
--- /dev/null
+++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+ *
+ * Copyright (C) 2024 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__
+#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* Core Clock list */
+#define R9A09G057_SYS_0_PCLK			0
+#define R9A09G057_CA55_0_CORE_CLK0		1
+#define R9A09G057_CA55_0_CORE_CLK1		2
+#define R9A09G057_CA55_0_CORE_CLK2		3
+#define R9A09G057_CA55_0_CORE_CLK3		4
+#define R9A09G057_CA55_0_PERIPHCLK		5
+#define R9A09G057_CM33_CLK0			6
+#define R9A09G057_CST_0_SWCLKTCK		7
+#define R9A09G057_IOTOP_0_SHCLK			8
+
+#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */
-- 
cgit v1.2.3


From 4b2b0a2ce8153d65d0829e45e73bf6acdc291344 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 28 Aug 2024 17:23:57 +0300
Subject: gpiolib: legacy: Kill GPIOF_INIT_* definitions

Besides the fact that (old) drivers use wrong definitions, e.g.,
GPIOF_INIT_HIGH instead of GPIOF_OUT_INIT_HIGH, shrink the legacy
definitions by killing those GPIOF_INIT_* completely.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Link: https://lore.kernel.org/r/20240828142554.2424189-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 063f71b18a7c..4af8ad114557 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -20,12 +20,9 @@ struct device;
 #define GPIOF_DIR_OUT	(0 << 0)
 #define GPIOF_DIR_IN	(1 << 0)
 
-#define GPIOF_INIT_LOW	(0 << 1)
-#define GPIOF_INIT_HIGH	(1 << 1)
-
 #define GPIOF_IN		(GPIOF_DIR_IN)
-#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
-#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
+#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | (0 << 1))
+#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | (1 << 1))
 
 /* Gpio pin is active-low */
 #define GPIOF_ACTIVE_LOW        (1 << 2)
-- 
cgit v1.2.3


From 8c045ca534d03bab1ce4b4de49d29a36276a1e35 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 28 Aug 2024 17:23:58 +0300
Subject: gpiolib: legacy: Kill GPIOF_DIR_* definitions

Besides the fact that (old) drivers use wrong definitions, e.g.,
GPIOF_DIR_IN instead of GPIOF_IN, shrink the legacy definitions
by killing those GPIOF_DIR_* completely.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Link: https://lore.kernel.org/r/20240828142554.2424189-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 4af8ad114557..2d105be7bbc3 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -17,12 +17,9 @@
 struct device;
 
 /* make these flag values available regardless of GPIO kconfig options */
-#define GPIOF_DIR_OUT	(0 << 0)
-#define GPIOF_DIR_IN	(1 << 0)
-
-#define GPIOF_IN		(GPIOF_DIR_IN)
-#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | (0 << 1))
-#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | (1 << 1))
+#define GPIOF_IN		((1 << 0))
+#define GPIOF_OUT_INIT_LOW	((0 << 0) | (0 << 1))
+#define GPIOF_OUT_INIT_HIGH	((0 << 0) | (1 << 1))
 
 /* Gpio pin is active-low */
 #define GPIOF_ACTIVE_LOW        (1 << 2)
-- 
cgit v1.2.3


From 60949b7b805424f21326b450ca4f1806c06d982e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <cleger@rivosinc.com>
Date: Mon, 26 Aug 2024 12:16:44 +0200
Subject: ACPI: CPPC: Fix MASK_VAL() usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MASK_VAL() was added as a way to handle bit_offset and bit_width for
registers located in system memory address space. However, while suited
for reading, it does not work for writing and result in corrupted
registers when writing values with bit_offset > 0. Moreover, when a
register is collocated with another one at the same address but with a
different mask, the current code results in the other registers being
overwritten with 0s. The write procedure for SYSTEM_MEMORY registers
should actually read the value, mask it, update it and write it with the
updated value. Moreover, since registers can be located in the same
word, we must take care of locking the access before doing it. We should
potentially use a global lock since we don't know in if register
addresses aren't shared with another _CPC package but better not
encourage vendors to do so. Assume that registers can use the same word
inside a _CPC package and thus, use a per _CPC package lock.

Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses")
Signed-off-by: Clément Léger <cleger@rivosinc.com>
Link: https://patch.msgid.link/20240826101648.95654-1-cleger@rivosinc.com
[ rjw: Dropped redundant semicolon ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/cppc_acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 930b6afba6f4..e1720d930666 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -64,6 +64,8 @@ struct cpc_desc {
 	int cpu_id;
 	int write_cmd_status;
 	int write_cmd_id;
+	/* Lock used for RMW operations in cpc_write() */
+	spinlock_t rmw_lock;
 	struct cpc_register_resource cpc_regs[MAX_CPC_REG_ENT];
 	struct acpi_psd_package domain_info;
 	struct kobject kobj;
-- 
cgit v1.2.3


From e220917fa50774fedb27c075df2261fd664e8ca3 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Thu, 22 Aug 2024 15:50:12 +0200
Subject: mm: split a folio in minimum folio order chunks

split_folio() and split_folio_to_list() assume order 0, to support
minorder for non-anonymous folios, we must expand these to check the
folio mapping order and use that.

Set new_order to be at least minimum folio order if it is set in
split_huge_page_to_list() so that we can maintain minimum folio order
requirement in the page cache.

Update the debugfs write files used for testing to ensure the order
is respected as well. We simply enforce the min order when a file
mapping is used.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240902124931.506061-2-kernel@pankajraghav.com # folded fix
Link: https://lore.kernel.org/r/20240822135018.1931258-5-kernel@pankajraghav.com
Tested-by: David Howells <dhowells@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/huge_mm.h | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e25d9ebfdf89..7a570e0437c9 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -96,6 +96,8 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
 	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
 
+#define split_folio(f) split_folio_to_list(f, NULL)
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PUD_SHIFT PUD_SHIFT
@@ -317,9 +319,24 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
 bool can_split_folio(struct folio *folio, int *pextra_pins);
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order);
+int min_order_for_split(struct folio *folio);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
-	return split_huge_page_to_list_to_order(page, NULL, 0);
+	struct folio *folio = page_folio(page);
+	int ret = min_order_for_split(folio);
+
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * split_huge_page() locks the page before splitting and
+	 * expects the same page that has been split to be locked when
+	 * returned. split_folio(page_folio(page)) cannot be used here
+	 * because it converts the page to folio and passes the head
+	 * page to be split.
+	 */
+	return split_huge_page_to_list_to_order(page, NULL, ret);
 }
 void deferred_split_folio(struct folio *folio);
 
@@ -484,6 +501,12 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
+
+static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	return 0;
+}
+
 static inline void deferred_split_folio(struct folio *folio) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
@@ -598,7 +621,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
 	return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
 #endif /* _LINUX_HUGE_MM_H */
-- 
cgit v1.2.3


From a0bbe77fafbc7e5eb41fbf3dc5cdb3608d8778a3 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Thu, 8 Aug 2024 09:11:00 +0200
Subject: dt-bindings: soc: fsl: cpm_qe: Add QUICC Engine (QE) TSA controller

Add support for the time slot assigner (TSA) available in some
PowerQUICC SoC that uses a QUICC Engine (QE) block such as MPC8321.

This QE TSA is similar to the CPM TSA except that it uses UCCs (Unified
Communication Controllers) instead of SCCs (Serial Communication
Controllers). Also, compared against the CPM TSA, this QE TSA can handle
up to 4 TDMs instead of 2 and allows to configure the logic level of
sync signals.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/20240808071132.149251-8-herve.codina@bootlin.com
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 include/dt-bindings/soc/qe-fsl,tsa.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 include/dt-bindings/soc/qe-fsl,tsa.h

(limited to 'include')

diff --git a/include/dt-bindings/soc/qe-fsl,tsa.h b/include/dt-bindings/soc/qe-fsl,tsa.h
new file mode 100644
index 000000000000..3cf3df9c0968
--- /dev/null
+++ b/include/dt-bindings/soc/qe-fsl,tsa.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+
+#ifndef __DT_BINDINGS_SOC_FSL_QE_TSA_H
+#define __DT_BINDINGS_SOC_FSL_QE_TSA_H
+
+#define FSL_QE_TSA_NU		0
+#define FSL_QE_TSA_UCC1		1
+#define FSL_QE_TSA_UCC2		2
+#define FSL_QE_TSA_UCC3		3
+#define FSL_QE_TSA_UCC4		4
+#define FSL_QE_TSA_UCC5		5
+
+#endif
-- 
cgit v1.2.3


From c6f39c7c165fce8fe1a41327da02dcca0a3cac25 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Thu, 8 Aug 2024 09:11:25 +0200
Subject: soc: fsl: qe: Add resource-managed muram allocators

Introduce devm_cpm_muram_alloc() and devm_cpm_muram_alloc_fixed(), the
resource-managed version of cpm_muram_alloc and cpm_muram_alloc_fixed().

These resource-managed versions simplify the user avoiding the need to
call cpm_muram_free(). Indeed, the allocated area returned by these
functions will be automatically freed on driver detach.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/20240808071132.149251-33-herve.codina@bootlin.com
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 include/soc/fsl/qe/qe.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index af793f2a0ec4..629835b6c71d 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -23,6 +23,8 @@
 #include <linux/of_address.h>
 #include <linux/types.h>
 
+struct device;
+
 #define QE_NUM_OF_SNUM	256	/* There are 256 serial number in QE */
 #define QE_NUM_OF_BRGS	16
 #define QE_NUM_OF_PORTS	1024
@@ -93,8 +95,12 @@ int cpm_muram_init(void);
 
 #if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
 s32 cpm_muram_alloc(unsigned long size, unsigned long align);
+s32 devm_cpm_muram_alloc(struct device *dev, unsigned long size,
+			 unsigned long align);
 void cpm_muram_free(s32 offset);
 s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
+s32 devm_cpm_muram_alloc_fixed(struct device *dev, unsigned long offset,
+			       unsigned long size);
 void __iomem *cpm_muram_addr(unsigned long offset);
 unsigned long cpm_muram_offset(const void __iomem *addr);
 dma_addr_t cpm_muram_dma(void __iomem *addr);
@@ -106,6 +112,12 @@ static inline s32 cpm_muram_alloc(unsigned long size,
 	return -ENOSYS;
 }
 
+static inline s32 devm_cpm_muram_alloc(struct device *dev, unsigned long size,
+				       unsigned long align)
+{
+	return -ENOSYS;
+}
+
 static inline void cpm_muram_free(s32 offset)
 {
 }
@@ -116,6 +128,13 @@ static inline s32 cpm_muram_alloc_fixed(unsigned long offset,
 	return -ENOSYS;
 }
 
+static inline s32 devm_cpm_muram_alloc_fixed(struct device *dev,
+					     unsigned long offset,
+					     unsigned long size)
+{
+	return -ENOSYS;
+}
+
 static inline void __iomem *cpm_muram_addr(unsigned long offset)
 {
 	return NULL;
@@ -172,7 +191,6 @@ static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
 /*
  * Pin multiplexing functions.
  */
-struct device;
 struct qe_pin;
 #ifdef CONFIG_QE_GPIO
 extern struct qe_pin *qe_pin_request(struct device *dev, int index);
@@ -233,7 +251,9 @@ static inline int qe_alive_during_sleep(void)
 /* we actually use cpm_muram implementation, define this for convenience */
 #define qe_muram_init cpm_muram_init
 #define qe_muram_alloc cpm_muram_alloc
+#define devm_qe_muram_alloc devm_cpm_muram_alloc
 #define qe_muram_alloc_fixed cpm_muram_alloc_fixed
+#define devm_qe_muram_alloc_fixed devm_cpm_muram_alloc_fixed
 #define qe_muram_free cpm_muram_free
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
-- 
cgit v1.2.3


From f68cd02d51a65594341168f03f7962e9d9540726 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Thu, 8 Aug 2024 09:11:26 +0200
Subject: soc: fsl: qe: Add missing PUSHSCHED command

The PUSHSCHED command is missing in the QE header file.

This command is supported on MPC8321 and is used to modify the start
address for the task running on a given peripheral. It is needed for the
QMC in order to perform the re-initialization procedure and so, ensure
the correct UCC setup in that case.

Simply add the missing command in the commands list available in the QE
header file.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/20240808071132.149251-34-herve.codina@bootlin.com
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 include/soc/fsl/qe/qe.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 629835b6c71d..8f967d15e479 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -469,6 +469,7 @@ enum comm_dir {
 #define QE_QMC_STOP_TX			0x0000000c
 #define QE_QMC_STOP_RX			0x0000000d
 #define QE_SS7_SU_FIL_RESET		0x0000000e
+#define QE_PUSHSCHED			0x0000000f
 /* jonathbr added from here down for 83xx */
 #define QE_RESET_BCS			0x0000000a
 #define QE_MCC_INIT_TX_RX_16		0x00000003
-- 
cgit v1.2.3


From f46a6e16519712651e2304da03ec144cc0bb084c Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 30 Aug 2024 18:26:28 +0300
Subject: usb: Add tunnel_mode parameter to usb device structure

Add 'tunnel_mode' enum to usb device structure to describe if a USB3
link is tunneled over USB4, or connected directly using native USB2/USB3
protocols.

Tunneled devices depend on USB4 NHI host to maintain the tunnel.
Knowledge about tunneled devices is important to ensure correct
suspend and resume order between USB4 hosts and tunneled devices.
i.e. make sure tunnel is up before the USB device using it resumes.

USB hosts such as xHCI may have vendor specific ways to detect tunneled
connections. This 'tunnel_mode' parameter can be set by USB3 host driver
during hcd->driver->update_device(hcd, udev) callback.

tunnel_mode can be set to:
USB_LINK_UNKNOWN = 0
USB_LINK_NATIVE
USB_LINK_TUNNELED

USB_LINK_UNKNOWN is used in case host is not capable of detecting
tunneled links.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20240830152630.3943215-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 832997a9da0a..672d8fc2abdb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -495,6 +495,12 @@ struct usb_dev_state;
 
 struct usb_tt;
 
+enum usb_link_tunnel_mode {
+	USB_LINK_UNKNOWN = 0,
+	USB_LINK_NATIVE,
+	USB_LINK_TUNNELED,
+};
+
 enum usb_port_connect_type {
 	USB_PORT_CONNECT_TYPE_UNKNOWN = 0,
 	USB_PORT_CONNECT_TYPE_HOT_PLUG,
@@ -605,6 +611,7 @@ struct usb3_lpm_parameters {
  *	WUSB devices are not, until we authorize them from user space.
  *	FIXME -- complete doc
  * @authenticated: Crypto authentication passed
+ * @tunnel_mode: Connection native or tunneled over USB4
  * @lpm_capable: device supports LPM
  * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range
  * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
@@ -714,6 +721,7 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned port_is_suspended:1;
+	enum usb_link_tunnel_mode tunnel_mode;
 
 	int slot_id;
 	struct usb2_lpm_parameters l1_params;
-- 
cgit v1.2.3


From d9c61bb33fbbcbc2d5f69efa10db116dab4b56cc Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Sun, 1 Sep 2024 21:11:16 +0200
Subject: usb: gadget: function: move u_f.h to include/linux/usb/func_utils.h

We move the func_utils.h header to include/linux/usb to be
able to compile function drivers outside of the
drivers/usb/gadget/function directory.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Link: https://lore.kernel.org/r/20240116-ml-topic-u9p-v12-1-9a27de5160e0@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/func_utils.h | 86 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 include/linux/usb/func_utils.h

(limited to 'include')

diff --git a/include/linux/usb/func_utils.h b/include/linux/usb/func_utils.h
new file mode 100644
index 000000000000..c8795c965109
--- /dev/null
+++ b/include/linux/usb/func_utils.h
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * func_utils.h
+ *
+ * Utility definitions for USB functions
+ *
+ * Copyright (c) 2013 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
+ */
+
+#ifndef _FUNC_UTILS_H_
+#define _FUNC_UTILS_H_
+
+#include <linux/usb/gadget.h>
+#include <linux/overflow.h>
+
+/* Variable Length Array Macros **********************************************/
+#define vla_group(groupname) size_t groupname##__next = 0
+#define vla_group_size(groupname) groupname##__next
+
+#define vla_item(groupname, type, name, n) \
+	size_t groupname##_##name##__offset = ({			       \
+		size_t offset = 0;					       \
+		if (groupname##__next != SIZE_MAX) {			       \
+			size_t align_mask = __alignof__(type) - 1;	       \
+			size_t size = array_size(n, sizeof(type));	       \
+			offset = (groupname##__next + align_mask) &	       \
+				  ~align_mask;				       \
+			if (check_add_overflow(offset, size,		       \
+					       &groupname##__next)) {          \
+				groupname##__next = SIZE_MAX;		       \
+				offset = 0;				       \
+			}						       \
+		}							       \
+		offset;							       \
+	})
+
+#define vla_item_with_sz(groupname, type, name, n) \
+	size_t groupname##_##name##__sz = array_size(n, sizeof(type));	        \
+	size_t groupname##_##name##__offset = ({			        \
+		size_t offset = 0;						\
+		if (groupname##__next != SIZE_MAX) {				\
+			size_t align_mask = __alignof__(type) - 1;		\
+			offset = (groupname##__next + align_mask) &		\
+				  ~align_mask;					\
+			if (check_add_overflow(offset, groupname##_##name##__sz,\
+							&groupname##__next)) {	\
+				groupname##__next = SIZE_MAX;			\
+				offset = 0;					\
+			}							\
+		}								\
+		offset;								\
+	})
+
+#define vla_ptr(ptr, groupname, name) \
+	((void *) ((char *)ptr + groupname##_##name##__offset))
+
+struct usb_ep;
+struct usb_request;
+
+/**
+ * alloc_ep_req - returns a usb_request allocated by the gadget driver and
+ * allocates the request's buffer.
+ *
+ * @ep: the endpoint to allocate a usb_request
+ * @len: usb_requests's buffer suggested size
+ *
+ * In case @ep direction is OUT, the @len will be aligned to ep's
+ * wMaxPacketSize. In order to avoid memory leaks or drops, *always* use
+ * usb_requests's length (req->length) to refer to the allocated buffer size.
+ * Requests allocated via alloc_ep_req() *must* be freed by free_ep_req().
+ */
+struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len);
+
+/* Frees a usb_request previously allocated by alloc_ep_req() */
+static inline void free_ep_req(struct usb_ep *ep, struct usb_request *req)
+{
+	WARN_ON(req->buf == NULL);
+	kfree(req->buf);
+	req->buf = NULL;
+	usb_ep_free_request(ep, req);
+}
+
+#endif /* _FUNC_UTILS_H_ */
-- 
cgit v1.2.3


From eaf9b2c875ece22768b78aa38da8b232e5de021b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 28 Aug 2024 11:34:02 +0200
Subject: netfilter: nf_tables: drop unused 3rd argument from validate callback
 ops

Since commit a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation")
the validate() callback no longer needs the return pointer argument.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h  | 3 +--
 include/net/netfilter/nft_fib.h    | 4 +---
 include/net/netfilter/nft_meta.h   | 3 +--
 include/net/netfilter/nft_reject.h | 3 +--
 4 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1cc33d946d41..7cd60ea7cdee 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -961,8 +961,7 @@ struct nft_expr_ops {
 						const struct nft_expr *expr,
 						bool reset);
 	int				(*validate)(const struct nft_ctx *ctx,
-						    const struct nft_expr *expr,
-						    const struct nft_data **data);
+						    const struct nft_expr *expr);
 	bool				(*reduce)(struct nft_regs_track *track,
 						  const struct nft_expr *expr);
 	bool				(*gc)(struct net *net,
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 167640b843ef..38cae7113de4 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -21,9 +21,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
 int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
 int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		 const struct nlattr * const tb[]);
-int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		     const struct nft_data **data);
-
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr);
 
 void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 			const struct nft_pktinfo *pkt);
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index ba1238f12a48..d602263590fe 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -41,8 +41,7 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx,
 			  const struct nft_expr *expr);
 
 int nft_meta_set_validate(const struct nft_ctx *ctx,
-			  const struct nft_expr *expr,
-			  const struct nft_data **data);
+			  const struct nft_expr *expr);
 
 bool nft_meta_get_reduce(struct nft_regs_track *track,
 			 const struct nft_expr *expr);
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 6d9ba62efd75..19060212988a 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -15,8 +15,7 @@ struct nft_reject {
 extern const struct nla_policy nft_reject_policy[];
 
 int nft_reject_validate(const struct nft_ctx *ctx,
-			const struct nft_expr *expr,
-			const struct nft_data **data);
+			const struct nft_expr *expr);
 
 int nft_reject_init(const struct nft_ctx *ctx,
 		    const struct nft_expr *expr,
-- 
cgit v1.2.3


From 85dfb34bb7d24c4585fa6a8186c3bf207470ecd7 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Wed, 28 Aug 2024 21:30:16 +0100
Subject: netfilter: nf_tables: Correct spelling in nf_tables.h

Correct spelling in nf_tables.h.
As reported by codespell.

Signed-off-by: Simon Horman <horms@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7cd60ea7cdee..02626ad17e99 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -687,7 +687,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
  *	@NFT_SET_EXT_TIMEOUT: element timeout
  *	@NFT_SET_EXT_EXPIRATION: element expiration time
  *	@NFT_SET_EXT_USERDATA: user data associated with the element
- *	@NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element
+ *	@NFT_SET_EXT_EXPRESSIONS: expressions associated with the element
  *	@NFT_SET_EXT_OBJREF: stateful object reference associated with element
  *	@NFT_SET_EXT_NUM: number of extension types
  */
-- 
cgit v1.2.3


From c362646b6fc15009219020c443f5256190be6436 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Fri, 30 Aug 2024 18:23:00 +0100
Subject: netfilter: nf_tables: Add missing Kernel doc

- Add missing documentation of struct field and enum items.
- Add missing documentation of function parameter.

Flagged by ./scripts/kernel-doc -none.

No functional change intended.
Compile tested only.

Signed-off-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 include/net/netfilter/nf_tproxy.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 02626ad17e99..c302b396e1a7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -209,6 +209,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
  *	@family: protocol family
  *	@level: depth of the chains
  *	@report: notify via unicast netlink message
+ *	@reg_inited: bitmap of initialised registers
  */
 struct nft_ctx {
 	struct net			*net;
@@ -313,6 +314,7 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
 /**
  * enum nft_iter_type - nftables set iterator type
  *
+ * @NFT_ITER_UNSPEC: unspecified, to catch errors
  * @NFT_ITER_READ: read-only iteration over set elements
  * @NFT_ITER_UPDATE: iteration under mutex to update set element state
  */
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index faa108b1ba67..5adf6fda11e8 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -36,6 +36,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
 
 /**
  * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
+ * @net:	The network namespace.
  * @skb:	The skb being processed.
  * @laddr:	IPv4 address to redirect to or zero.
  * @lport:	TCP port to redirect to or zero.
-- 
cgit v1.2.3


From beb5a9bea8239cdf4adf6b62672e30db3e9fa5ce Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 29 Aug 2024 14:33:36 +0200
Subject: netdevice: convert private flags > BIT(31) to bitfields

Make dev->priv_flags `u32` back and define bits higher than 31 as
bitfield booleans as per Jakub's suggestion. This simplifies code
which accesses these bits with no optimization loss (testb both
before/after), allows to not extend &netdev_priv_flags each time,
but also scales better as bits > 63 in the future would only add
a new u64 to the structure with no complications, comparing to
that extending ::priv_flags would require converting it to a bitmap.
Note that I picked `unsigned long :1` to not lose any potential
optimizations comparing to `bool :1` etc.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fce70990b209..d6f35c9d8580 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1613,7 +1613,8 @@ struct net_device_ops {
  * userspace; this means that the order of these flags can change
  * during any kernel release.
  *
- * You should have a pretty good reason to be extending these flags.
+ * You should add bitfield booleans after either net_device::priv_flags
+ * (hotpath) or ::threaded (slowpath) instead of extending these flags.
  *
  * @IFF_802_1Q_VLAN: 802.1Q VLAN device
  * @IFF_EBRIDGE: Ethernet bridging device
@@ -1652,10 +1653,6 @@ struct net_device_ops {
  * @IFF_NO_ADDRCONF: prevent ipv6 addrconf
  * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
  *	skb_headlen(skb) == 0 (data starts from frag0)
- * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
- * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to
- *	ndo_hwtstamp_set() for all timestamp requests regardless of source,
- *	even if those aren't HWTSTAMP_SOURCE_NETDEV.
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1690,8 +1687,6 @@ enum netdev_priv_flags {
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
 	IFF_NO_ADDRCONF			= BIT_ULL(30),
 	IFF_TX_SKB_NO_LINEAR		= BIT_ULL(31),
-	IFF_CHANGE_PROTO_DOWN		= BIT_ULL(32),
-	IFF_SEE_ALL_HWTSTAMP_REQUESTS	= BIT_ULL(33),
 };
 
 /* Specifies the type of the struct net_device::ml_priv pointer */
@@ -1723,6 +1718,9 @@ enum netdev_reg_state {
  *	data with strictly "high-level" data, and it has to know about
  *	almost every data structure used in the INET module.
  *
+ *	@priv_flags:	flags invisible to userspace defined as bits, see
+ *			enum netdev_priv_flags for the definitions
+ *
  *	@name:	This is the first field of the "visible" part of this structure
  *		(i.e. as seen by users in the "Space.c" file).  It is the name
  *		of the interface.
@@ -1789,8 +1787,6 @@ enum netdev_reg_state {
  *
  *	@flags:		Interface flags (a la BSD)
  *	@xdp_features:	XDP capability supported by the device
- *	@priv_flags:	Like 'flags' but invisible to userspace,
- *			see if.h for the definitions
  *	@gflags:	Global flags ( kept as legacy )
  *	@priv_len:	Size of the ->priv flexible array
  *	@priv:		Flexible array containing private data
@@ -1964,6 +1960,12 @@ enum netdev_reg_state {
  *
  *	@threaded:	napi threaded mode is enabled
  *
+ *	@see_all_hwtstamp_requests: device wants to see calls to
+ *			ndo_hwtstamp_set() for all timestamp requests
+ *			regardless of source, even if those aren't
+ *			HWTSTAMP_SOURCE_NETDEV
+ *	@change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
+ *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
  *				to another network namespace.
@@ -2014,7 +2016,9 @@ struct net_device {
 
 	/* TX read-mostly hotpath */
 	__cacheline_group_begin(net_device_read_tx);
-	unsigned long long	priv_flags;
+	struct_group(priv_flags_fast,
+		unsigned long		priv_flags:32;
+	);
 	const struct net_device_ops *netdev_ops;
 	const struct header_ops *header_ops;
 	struct netdev_queue	*_tx;
@@ -2350,6 +2354,10 @@ struct net_device {
 	bool			proto_down;
 	bool			threaded;
 
+	/* priv_flags_slow, ungrouped to save space */
+	unsigned long		see_all_hwtstamp_requests:1;
+	unsigned long		change_proto_down:1;
+
 	struct list_head	net_notifier_list;
 
 #if IS_ENABLED(CONFIG_MACSEC)
-- 
cgit v1.2.3


From 00d066a4d4edbe559ba6c35153da71d4b2b8a383 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 29 Aug 2024 14:33:37 +0200
Subject: netdev_features: convert NETIF_F_LLTX to dev->lltx

NETIF_F_LLTX can't be changed via Ethtool and is not a feature,
rather an attribute, very similar to IFF_NO_QUEUE (and hot).
Free one netdev_features_t bit and make it a "hot" private flag.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdev_features.h |  6 ++----
 include/linux/netdevice.h       | 10 +++++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 7c2d77d75a88..a2e20b517584 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -24,8 +24,7 @@ enum {
 	NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */
 	NETIF_F_VLAN_CHALLENGED_BIT,	/* Device cannot handle VLAN packets */
 	NETIF_F_GSO_BIT,		/* Enable software GSO. */
-	NETIF_F_LLTX_BIT,		/* LockLess TX - deprecated. Please */
-					/* do not use LLTX in new drivers */
+	__UNUSED_NETIF_F_12,
 	NETIF_F_NETNS_LOCAL_BIT,	/* Does not change network namespaces */
 	NETIF_F_GRO_BIT,		/* Generic receive offload */
 	NETIF_F_LRO_BIT,		/* large receive offload */
@@ -120,7 +119,6 @@ enum {
 #define NETIF_F_HW_VLAN_CTAG_TX	__NETIF_F(HW_VLAN_CTAG_TX)
 #define NETIF_F_IP_CSUM		__NETIF_F(IP_CSUM)
 #define NETIF_F_IPV6_CSUM	__NETIF_F(IPV6_CSUM)
-#define NETIF_F_LLTX		__NETIF_F(LLTX)
 #define NETIF_F_LOOPBACK	__NETIF_F(LOOPBACK)
 #define NETIF_F_LRO		__NETIF_F(LRO)
 #define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
@@ -193,7 +191,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
-				 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
+				 NETIF_F_NETNS_LOCAL)
 
 /* remember that ((t)1 << t_BITS) is undefined in C99 */
 #define NETIF_F_ETHTOOL_BITS	((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d6f35c9d8580..e22ca5e07490 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1720,6 +1720,9 @@ enum netdev_reg_state {
  *
  *	@priv_flags:	flags invisible to userspace defined as bits, see
  *			enum netdev_priv_flags for the definitions
+ *	@lltx:		device supports lockless Tx. Deprecated for real HW
+ *			drivers. Mainly used by logical interfaces, such as
+ *			bonding and tunnels
  *
  *	@name:	This is the first field of the "visible" part of this structure
  *		(i.e. as seen by users in the "Space.c" file).  It is the name
@@ -2018,6 +2021,7 @@ struct net_device {
 	__cacheline_group_begin(net_device_read_tx);
 	struct_group(priv_flags_fast,
 		unsigned long		priv_flags:32;
+		unsigned long		lltx:1;
 	);
 	const struct net_device_ops *netdev_ops;
 	const struct header_ops *header_ops;
@@ -4433,7 +4437,7 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 }
 
 #define HARD_TX_LOCK(dev, txq, cpu) {			\
-	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+	if (!(dev)->lltx) {				\
 		__netif_tx_lock(txq, cpu);		\
 	} else {					\
 		__netif_tx_acquire(txq);		\
@@ -4441,12 +4445,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 }
 
 #define HARD_TX_TRYLOCK(dev, txq)			\
-	(((dev->features & NETIF_F_LLTX) == 0) ?	\
+	(!(dev)->lltx ?					\
 		__netif_tx_trylock(txq) :		\
 		__netif_tx_acquire(txq))
 
 #define HARD_TX_UNLOCK(dev, txq) {			\
-	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+	if (!(dev)->lltx) {				\
 		__netif_tx_unlock(txq);			\
 	} else {					\
 		__netif_tx_release(txq);		\
-- 
cgit v1.2.3


From 05c1280a2bcfca187fe7fa90bb240602cf54af0a Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 29 Aug 2024 14:33:38 +0200
Subject: netdev_features: convert NETIF_F_NETNS_LOCAL to dev->netns_local

"Interface can't change network namespaces" is rather an attribute,
not a feature, and it can't be changed via Ethtool.
Make it a "cold" private flag instead of a netdev_feature and free
one more bit.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdev_features.h | 6 ++----
 include/linux/netdevice.h       | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index a2e20b517584..d5a3836f4793 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -25,7 +25,7 @@ enum {
 	NETIF_F_VLAN_CHALLENGED_BIT,	/* Device cannot handle VLAN packets */
 	NETIF_F_GSO_BIT,		/* Enable software GSO. */
 	__UNUSED_NETIF_F_12,
-	NETIF_F_NETNS_LOCAL_BIT,	/* Does not change network namespaces */
+	__UNUSED_NETIF_F_13,
 	NETIF_F_GRO_BIT,		/* Generic receive offload */
 	NETIF_F_LRO_BIT,		/* large receive offload */
 
@@ -121,7 +121,6 @@ enum {
 #define NETIF_F_IPV6_CSUM	__NETIF_F(IPV6_CSUM)
 #define NETIF_F_LOOPBACK	__NETIF_F(LOOPBACK)
 #define NETIF_F_LRO		__NETIF_F(LRO)
-#define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
 #define NETIF_F_NOCACHE_COPY	__NETIF_F(NOCACHE_COPY)
 #define NETIF_F_NTUPLE		__NETIF_F(NTUPLE)
 #define NETIF_F_RXCSUM		__NETIF_F(RXCSUM)
@@ -190,8 +189,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
-#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
-				 NETIF_F_NETNS_LOCAL)
+#define NETIF_F_NEVER_CHANGE	NETIF_F_VLAN_CHALLENGED
 
 /* remember that ((t)1 << t_BITS) is undefined in C99 */
 #define NETIF_F_ETHTOOL_BITS	((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e22ca5e07490..a698e2402420 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1968,6 +1968,7 @@ enum netdev_reg_state {
  *			regardless of source, even if those aren't
  *			HWTSTAMP_SOURCE_NETDEV
  *	@change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
+ *	@netns_local: interface can't change network namespaces
  *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
@@ -2361,6 +2362,7 @@ struct net_device {
 	/* priv_flags_slow, ungrouped to save space */
 	unsigned long		see_all_hwtstamp_requests:1;
 	unsigned long		change_proto_down:1;
+	unsigned long		netns_local:1;
 
 	struct list_head	net_notifier_list;
 
-- 
cgit v1.2.3


From 782dbbf589cd9082effaec522e3f1b4ce1594803 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 29 Aug 2024 14:33:39 +0200
Subject: netdev_features: convert NETIF_F_FCOE_MTU to dev->fcoe_mtu

Ability to handle maximum FCoE frames of 2158 bytes can never be changed
and thus more of an attribute, not a toggleable feature.
Move it from netdev_features_t to "cold" priv flags (bitfield bool) and
free yet another feature bit.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdev_features.h | 6 ++----
 include/linux/netdevice.h       | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index d5a3836f4793..37af2c6e7caf 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -58,7 +58,7 @@ enum {
 
 	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
 	NETIF_F_SCTP_CRC_BIT,		/* SCTP checksum offload */
-	NETIF_F_FCOE_MTU_BIT,		/* Supports max FCoE MTU, 2158 bytes*/
+	__UNUSED_NETIF_F_37,
 	NETIF_F_NTUPLE_BIT,		/* N-tuple filters supported */
 	NETIF_F_RXHASH_BIT,		/* Receive hashing offload */
 	NETIF_F_RXCSUM_BIT,		/* Receive checksumming offload */
@@ -105,7 +105,6 @@ enum {
 #define __NETIF_F(name)		__NETIF_F_BIT(NETIF_F_##name##_BIT)
 
 #define NETIF_F_FCOE_CRC	__NETIF_F(FCOE_CRC)
-#define NETIF_F_FCOE_MTU	__NETIF_F(FCOE_MTU)
 #define NETIF_F_FRAGLIST	__NETIF_F(FRAGLIST)
 #define NETIF_F_FSO		__NETIF_F(FSO)
 #define NETIF_F_GRO		__NETIF_F(GRO)
@@ -210,8 +209,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 #define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | \
 				 NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID)
 
-#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
-				 NETIF_F_FSO)
+#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP |	     \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a698e2402420..ca5f0dda733b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1969,6 +1969,7 @@ enum netdev_reg_state {
  *			HWTSTAMP_SOURCE_NETDEV
  *	@change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
  *	@netns_local: interface can't change network namespaces
+ *	@fcoe_mtu:	device supports maximum FCoE MTU, 2158 bytes
  *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
@@ -2363,6 +2364,7 @@ struct net_device {
 	unsigned long		see_all_hwtstamp_requests:1;
 	unsigned long		change_proto_down:1;
 	unsigned long		netns_local:1;
+	unsigned long		fcoe_mtu:1;
 
 	struct list_head	net_notifier_list;
 
-- 
cgit v1.2.3


From a61fec1c87be682b5c0fdba6074649c469c675a3 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 29 Aug 2024 14:33:40 +0200
Subject: netdev_features: remove NETIF_F_ALL_FCOE

NETIF_F_ALL_FCOE is used only in vlan_dev.c, 2 times. Now that it's only
2 bits, open-code it and remove the definition from netdev_features.h.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdev_features.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 37af2c6e7caf..66e7d26b70a4 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -209,8 +209,6 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 #define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | \
 				 NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID)
 
-#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FSO)
-
 /* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP |	     \
 				 NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST)
-- 
cgit v1.2.3


From 2c9ffe872e64f5aec12f2ff6cec1b7542569a88f Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Tue, 3 Sep 2024 09:21:44 +0100
Subject: wifi: cfg80211: wext: Update spelling and grammar

Correct spelling in iw_handler.h.
As reported by codespell.

Also, while the "few shortcomings" line is being updated,
correct its grammar.

Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240903-wifi-spell-v2-1-bfcf7062face@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index b2cf243ebe44..7af1082ea9a0 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -23,7 +23,7 @@
  * to handle wireless statistics.
  *
  * The initial APIs served us well and has proven a reasonably good design.
- * However, there is a few shortcommings :
+ * However, there are a few shortcomings :
  *	o No events, everything is a request to the driver.
  *	o Large ioctl function in driver with gigantic switch statement
  *	  (i.e. spaghetti code).
@@ -38,13 +38,13 @@
  * -------------------------------
  * The new driver API is just a bunch of standard functions (handlers),
  * each handling a specific Wireless Extension. The driver just export
- * the list of handler it supports, and those will be called apropriately.
+ * the list of handler it supports, and those will be called appropriately.
  *
  * I tried to keep the main advantage of the previous API (simplicity,
  * efficiency and light weight), and also I provide a good dose of backward
  * compatibility (most structures are the same, driver can use both API
  * simultaneously, ...).
- * Hopefully, I've also addressed the shortcomming of the initial API.
+ * Hopefully, I've also addressed the shortcoming of the initial API.
  *
  * The advantage of the new API are :
  *	o Handling of Extensions in driver broken in small contained functions
@@ -84,7 +84,7 @@
 
 /* ---------------------- THE IMPLEMENTATION ---------------------- */
 /*
- * Some of the choice I've made are pretty controversials. Defining an
+ * Some of the choice I've made are pretty controversial. Defining an
  * API is very much weighting compromises. This goes into some of the
  * details and the thinking behind the implementation.
  *
@@ -140,7 +140,7 @@
  * example to distinguish setting max rate and basic rate), I would
  * break the prototype. Using iwreq_data is more flexible.
  * 3) Also, the above form is not generic (see above).
- * 4) I don't expect driver developper using the wrong field of the
+ * 4) I don't expect driver developer using the wrong field of the
  * union (Doh !), so static typechecking doesn't add much value.
  * 5) Lastly, you can skip the union by doing :
  *	static int mydriver_ioctl_setrate(struct net_device *dev,
@@ -459,7 +459,7 @@ int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info,
 void wireless_spy_update(struct net_device *dev, unsigned char *address,
 			 struct iw_quality *wstats);
 
-/************************* INLINE FUNTIONS *************************/
+/************************* INLINE FUNCTIONS *************************/
 /*
  * Function that are so simple that it's more efficient inlining them
  */
-- 
cgit v1.2.3


From b45ed06f46737f8c2ee65698f4305409f2386674 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Tue, 13 Aug 2024 22:19:32 +0800
Subject: drivers/base: Introduce device_match_t for device finding APIs

There are several drivers/base APIs for finding a specific device, and
they currently use the following good type for the @match parameter:
int (*match)(struct device *dev, const void *data)

Since these operations do not modify the caller-provided @*data, this
type is worthy of a dedicated typedef:
typedef int (*device_match_t)(struct device *dev, const void *data)

Advantages of using device_match_t:
 - Shorter API declarations and definitions
 - Prevent further APIs from using a bad type for @match

So introduce device_match_t and apply it to the existing
(bus|class|driver|auxiliary)_find_device() APIs.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/r/20240813-dev_match_api-v3-1-6c6878a99b9f@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 2 +-
 include/linux/device/bus.h    | 6 ++++--
 include/linux/device/class.h  | 2 +-
 include/linux/device/driver.h | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 662b8ae54b6a..31762324bcc9 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -271,6 +271,6 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
 
 struct auxiliary_device *auxiliary_find_device(struct device *start,
 					       const void *data,
-					       int (*match)(struct device *dev, const void *data));
+					       device_match_t match);
 
 #endif /* _AUXILIARY_BUS_H_ */
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 807831d6bf0f..cdc4757217f9 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -126,6 +126,9 @@ struct bus_attribute {
 int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr);
 void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr);
 
+/* Matching function type for drivers/base APIs to find a specific device */
+typedef int (*device_match_t)(struct device *dev, const void *data);
+
 /* Generic device matching functions that all busses can use to match with */
 int device_match_name(struct device *dev, const void *name);
 int device_match_of_node(struct device *dev, const void *np);
@@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused);
 int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
 struct device *bus_find_device(const struct bus_type *bus, struct device *start,
-			       const void *data,
-			       int (*match)(struct device *dev, const void *data));
+			       const void *data, device_match_t match);
 /**
  * bus_find_device_by_name - device iterator for locating a particular device
  * of a specific name.
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index c576b49c55c2..518c9c83d64b 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter);
 int class_for_each_device(const struct class *class, const struct device *start, void *data,
 			  int (*fn)(struct device *dev, void *data));
 struct device *class_find_device(const struct class *class, const struct device *start,
-				 const void *data, int (*match)(struct device *, const void *));
+				 const void *data, device_match_t match);
 
 /**
  * class_find_device_by_name - device iterator for locating a particular device
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 1fc8b68786de..5c04b8e3833b 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device
 					void *data, int (*fn)(struct device *dev, void *));
 struct device *driver_find_device(const struct device_driver *drv,
 				  struct device *start, const void *data,
-				  int (*match)(struct device *dev, const void *data));
+				  device_match_t match);
 
 /**
  * driver_find_device_by_name - device iterator for locating a particular device
-- 
cgit v1.2.3


From 24e041e1e48d06f25a12caaf73728a4ec2e511fe Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 23 Aug 2024 15:55:44 +0800
Subject: platform: Make platform_bus_type constant

Since commit d492cc2573a0 ("driver core: device.h: make struct
bus_type a const *"), the driver core can properly handle constant
struct bus_type, move the platform_bus_type variable to be a constant
structure as well, placing it into read-only memory which can not be
modified at runtime.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Link: https://lore.kernel.org/r/20240823075544.144426-1-kunwu.chan@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d422db6eec63..7132623e4658 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -52,7 +52,7 @@ struct platform_device {
 extern int platform_device_register(struct platform_device *);
 extern void platform_device_unregister(struct platform_device *);
 
-extern struct bus_type platform_bus_type;
+extern const struct bus_type platform_bus_type;
 extern struct device platform_bus;
 
 extern struct resource *platform_get_resource(struct platform_device *,
-- 
cgit v1.2.3


From 8064952c65045f05ee2671fe437770e50c151776 Mon Sep 17 00:00:00 2001
From: Stuart Hayes <stuart.w.hayes@gmail.com>
Date: Thu, 22 Aug 2024 15:28:04 -0500
Subject: driver core: shut down devices asynchronously

Add code to allow asynchronous shutdown of devices, ensuring that each
device is shut down before its parents & suppliers.

Only devices with drivers that have async_shutdown_enable enabled will be
shut down asynchronously.

This can dramatically reduce system shutdown/reboot time on systems that
have multiple devices that take many seconds to shut down (like certain
NVMe drives). On one system tested, the shutdown time went from 11 minutes
without this patch to 55 seconds with the patch.

Signed-off-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Signed-off-by: David Jeffery <djeffery@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Tested-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20240822202805.6379-4-stuart.w.hayes@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 5c04b8e3833b..14c9211b82d6 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -56,6 +56,7 @@ enum probe_type {
  * @mod_name:	Used for built-in modules.
  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
  * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
+ * @async_shutdown_enable: Enables devices to be shutdown asynchronously.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
  * @probe:	Called to query the existence of a specific device,
@@ -102,6 +103,7 @@ struct device_driver {
 
 	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
 	enum probe_type probe_type;
+	bool async_shutdown_enable;
 
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
-- 
cgit v1.2.3


From 8ab0f4605d5ce3c0d386b3828b07719f1e8e0505 Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 23 Aug 2024 14:24:40 +0800
Subject: bus: fsl-mc: make fsl_mc_bus_type const

Since commit d492cc2573a0 ("driver core: device.h: make struct
bus_type a const *"), the driver core can properly handle constant
struct bus_type, move the fsl_mc_bus_type variable to be a constant
structure as well, placing it into read-only memory which can not be
modified at runtime.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu> # for
Link: https://lore.kernel.org/r/20240823062440.113628-1-kunwu.chan@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsl/mc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 083c860fd28e..c90ec889bfc2 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
 struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
 					  u16 if_id);
 
-extern struct bus_type fsl_mc_bus_type;
+extern const struct bus_type fsl_mc_bus_type;
 
 extern struct device_type fsl_mc_bus_dprc_type;
 extern struct device_type fsl_mc_bus_dpni_type;
-- 
cgit v1.2.3


From 4c0a6a0ac902dbf184ae7be1f1fce225cc0b7380 Mon Sep 17 00:00:00 2001
From: Chanwoo Lee <cw9316.lee@samsung.com>
Date: Thu, 29 Aug 2024 11:47:09 +0900
Subject: mmc: core: Replace the argument of mmc_sd_switch() with defines

Replace with already defined values for readability. While at it, let's
also change the mode-parameter from an int to bool, as the only used values
are 0 or 1.

Signed-off-by: Chanwoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20240829024709.402285-1-cw9316.lee@samsung.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ac01cd1622ef..6a31ed02d3ff 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -648,7 +648,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host,
 	host->err_stats[stat] += 1;
 }
 
-int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp);
+int mmc_sd_switch(struct mmc_card *card, bool mode, int group,
+		u8 value, u8 *resp);
 int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
-- 
cgit v1.2.3


From 7df7c204c678e24cd32d33360538670b7b90e330 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Thu, 22 Aug 2024 15:50:18 +0200
Subject: xfs: enable block size larger than page size support

Page cache now has the ability to have a minimum order when allocating
a folio which is a prerequisite to add support for block size > page
size.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20240827-xfs-fix-wformat-bs-gt-ps-v1-1-aec6717609e0@kernel.org # fix folded
Link: https://lore.kernel.org/r/20240822135018.1931258-11-kernel@pankajraghav.com
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pagemap.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4cc170949e9c..55b254d951da 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -374,6 +374,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 #define MAX_XAS_ORDER		(XA_CHUNK_SHIFT * 2 - 1)
 #define MAX_PAGECACHE_ORDER	min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
 
+/*
+ * mapping_max_folio_size_supported() - Check the max folio size supported
+ *
+ * The filesystem should call this function at mount time if there is a
+ * requirement on the folio mapping size in the page cache.
+ */
+static inline size_t mapping_max_folio_size_supported(void)
+{
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER);
+	return PAGE_SIZE;
+}
+
 /*
  * mapping_set_folio_order_range() - Set the orders supported by a file.
  * @mapping: The address space of the file.
-- 
cgit v1.2.3


From 31754ea6cbbc08d5bbe1fa290320c3048d8d98a3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 27 Aug 2024 06:51:36 -0400
Subject: iomap: add a private argument for iomap_file_buffered_write

In order to switch fuse over to using iomap for buffered writes we need
to be able to have the struct file for the original write, in case we
have to read in the page to make it uptodate.  Handle this by using the
existing private field in the iomap_iter, and add the argument to
iomap_file_buffered_write.  This will allow us to pass the file in
through the iomap buffered write path, and is flexible for any other
file systems needs.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/7f55c7c32275004ba00cddf862d970e6e633f750.1724755651.git.josef@toxicpanda.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iomap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6fc1c858013d..f792b37f7627 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -257,7 +257,7 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
 }
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
-		const struct iomap_ops *ops);
+		const struct iomap_ops *ops, void *private);
 int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 		struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
 		int (*punch)(struct inode *inode, loff_t pos, loff_t length));
-- 
cgit v1.2.3


From 6f634eb080161baa4811a39f5ec07923ede092bc Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Tue, 27 Aug 2024 10:42:07 +0200
Subject: filemap: fix htmldoc warning for mapping_align_index()

Stephen reported that there is a kernel build warning due to a missing
description of a parameter in mapping_align_index().

Add the missing index parameter in the comment description.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240827084206.106347-2-kernel@pankajraghav.com
Fixes: ab95d23bab22 ("filemap: allocate mapping_min_order folios in the page cache")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pagemap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 55b254d951da..6c0dde447c98 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -470,6 +470,7 @@ mapping_min_folio_nrpages(struct address_space *mapping)
 /**
  * mapping_align_index() - Align index for this mapping.
  * @mapping: The address_space.
+ * @index: The page index.
  *
  * The index of a folio must be naturally aligned.  If you are adding a
  * new folio to the page cache and need to know what index to give it,
-- 
cgit v1.2.3


From 4686cc598f669dea1b50dde1568e6c65c355bc67 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2024 00:25:51 +0200
Subject: sched: Clean up DL server vs core sched

Abide by the simple rule:

  pick_next_task() := pick_task() + set_next_task(.first = true)

This allows us to trivially get rid of server_pick_next() and things
collapse nicely.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240813224015.837303391@infradead.org
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3709dedbab59..57cf27a3045c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -692,7 +692,6 @@ struct sched_dl_entity {
 	 */
 	struct rq			*rq;
 	dl_server_has_tasks_f		server_has_tasks;
-	dl_server_pick_f		server_pick_next;
 	dl_server_pick_f		server_pick_task;
 
 #ifdef CONFIG_RT_MUTEXES
-- 
cgit v1.2.3


From 73d3c04b710f0c144ce873dfe4f173a55c749539 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Sep 2024 01:08:58 +0200
Subject: netfilter: nf_tables: annotate data-races around element expiration

element expiration can be read-write locklessly, it can be written by
dynset and read from netlink dump, add annotation.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c302b396e1a7..1528af3fe26f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -835,7 +835,7 @@ static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
 					  u64 tstamp)
 {
 	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
-	       time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
+	       time_after_eq64(tstamp, READ_ONCE(*nft_set_ext_expiration(ext)));
 }
 
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
-- 
cgit v1.2.3


From 4c5daea9af4fce6628b8ca9e6a332529bbf26809 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Sep 2024 01:09:17 +0200
Subject: netfilter: nf_tables: consolidate timeout extension for elements

Expiration and timeout are stored in separated set element extensions,
but they are tightly coupled. Consolidate them in a single extension to
simplify and prepare for set element updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1528af3fe26f..1e9b5e1659a1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -687,7 +687,6 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
  *	@NFT_SET_EXT_DATA: mapping data
  *	@NFT_SET_EXT_FLAGS: element flags
  *	@NFT_SET_EXT_TIMEOUT: element timeout
- *	@NFT_SET_EXT_EXPIRATION: element expiration time
  *	@NFT_SET_EXT_USERDATA: user data associated with the element
  *	@NFT_SET_EXT_EXPRESSIONS: expressions associated with the element
  *	@NFT_SET_EXT_OBJREF: stateful object reference associated with element
@@ -699,7 +698,6 @@ enum nft_set_extensions {
 	NFT_SET_EXT_DATA,
 	NFT_SET_EXT_FLAGS,
 	NFT_SET_EXT_TIMEOUT,
-	NFT_SET_EXT_EXPIRATION,
 	NFT_SET_EXT_USERDATA,
 	NFT_SET_EXT_EXPRESSIONS,
 	NFT_SET_EXT_OBJREF,
@@ -811,14 +809,14 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
 	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
 }
 
-static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
-{
-	return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
-}
+struct nft_timeout {
+	u64	timeout;
+	u64	expiration;
+};
 
-static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
+static inline struct nft_timeout *nft_set_ext_timeout(const struct nft_set_ext *ext)
 {
-	return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
+	return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
 }
 
 static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
@@ -834,8 +832,8 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
 static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
 					  u64 tstamp)
 {
-	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
-	       time_after_eq64(tstamp, READ_ONCE(*nft_set_ext_expiration(ext)));
+	return nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+	       time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration));
 }
 
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
-- 
cgit v1.2.3


From 8bfb74ae12fa4cd3c9b49bb5913610b5709bffd7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Sep 2024 01:09:27 +0200
Subject: netfilter: nf_tables: zero timeout means element never times out

This patch uses zero as timeout marker for those elements that never expire
when the element is created.

If userspace provides no timeout for an element, then the default set
timeout applies. However, if no default set timeout is specified and
timeout flag is set on, then timeout extension is allocated and timeout
is set to zero to allow for future updates.

Use of zero a never timeout marker has been suggested by Phil Sutter.

Note that, in older kernels, it is already possible to define elements
that never expire by declaring a set with the set timeout flag set on
and no global set timeout, in this case, new element with no explicit
timeout never expire do not allocate the timeout extension, hence, they
never expire. This approach makes it complicated to accomodate element
timeout update, because element extensions do not support reallocations.
Therefore, allocate the timeout extension and use the new marker for
this case, but do not expose it to userspace to retain backward
compatibility in the set listing.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 7 +++++--
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1e9b5e1659a1..7511918dce6f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -832,8 +832,11 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
 static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
 					  u64 tstamp)
 {
-	return nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
-	       time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration));
+	if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) ||
+	    nft_set_ext_timeout(ext)->timeout == 0)
+		return false;
+
+	return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration));
 }
 
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 639894ed1b97..d6476ca5d7a6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -436,7 +436,7 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
  * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
  * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
- * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
+ * @NFTA_SET_ELEM_TIMEOUT: timeout value, zero means never times out (NLA_U64)
  * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
  * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
  * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
-- 
cgit v1.2.3


From 4201f3938914d8df3c761754b9726770c4225d66 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Sep 2024 01:09:58 +0200
Subject: netfilter: nf_tables: set element timeout update support

Store new timeout and expiration in transaction object, use them to
update elements from .commit path. Otherwise, discard update if .abort
path is exercised.

Use update_flags in the transaction to note whether the timeout,
expiration, or both need to be updated.

Annotate access to timeout extension now that it can be updated while
lockless read access is possible.

Reject timeout updates on elements with no timeout extension.

Element transaction remains in the 96 bytes kmalloc slab on x86_64 after
this update.

This patch requires ("netfilter: nf_tables: use timestamp to check for
set element timeout") to make sure an element does not expire while
transaction is ongoing.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7511918dce6f..49708e7e1339 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -833,7 +833,7 @@ static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
 					  u64 tstamp)
 {
 	if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) ||
-	    nft_set_ext_timeout(ext)->timeout == 0)
+	    READ_ONCE(nft_set_ext_timeout(ext)->timeout) == 0)
 		return false;
 
 	return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration));
@@ -1749,10 +1749,18 @@ struct nft_trans_table {
 #define nft_trans_table_update(trans)			\
 	nft_trans_container_table(trans)->update
 
+enum nft_trans_elem_flags {
+	NFT_TRANS_UPD_TIMEOUT		= (1 << 0),
+	NFT_TRANS_UPD_EXPIRATION	= (1 << 1),
+};
+
 struct nft_trans_elem {
 	struct nft_trans		nft_trans;
 	struct nft_set			*set;
 	struct nft_elem_priv		*elem_priv;
+	u64				timeout;
+	u64				expiration;
+	u8				update_flags;
 	bool				bound;
 };
 
@@ -1762,6 +1770,12 @@ struct nft_trans_elem {
 	nft_trans_container_elem(trans)->set
 #define nft_trans_elem_priv(trans)			\
 	nft_trans_container_elem(trans)->elem_priv
+#define nft_trans_elem_update_flags(trans)		\
+	nft_trans_container_elem(trans)->update_flags
+#define nft_trans_elem_timeout(trans)			\
+	nft_trans_container_elem(trans)->timeout
+#define nft_trans_elem_expiration(trans)		\
+	nft_trans_container_elem(trans)->expiration
 #define nft_trans_elem_set_bound(trans)			\
 	nft_trans_container_elem(trans)->bound
 
-- 
cgit v1.2.3


From 631598c419985327110135efb696d9f9fe67bffd Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 14 Aug 2024 11:26:29 +0200
Subject: iio: dac: ad5449: drop support for platform data

There are no longer any users of the platform data struct. Remove
support for it from the driver.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://patch.msgid.link/20240814092629.9862-1-brgl@bgdev.pl
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/platform_data/ad5449.h | 39 ------------------------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 include/linux/platform_data/ad5449.h

(limited to 'include')

diff --git a/include/linux/platform_data/ad5449.h b/include/linux/platform_data/ad5449.h
deleted file mode 100644
index d687ef5726c2..000000000000
--- a/include/linux/platform_data/ad5449.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog
- * Converter driver.
- *
- * Copyright 2012 Analog Devices Inc.
- *  Author: Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#ifndef __LINUX_PLATFORM_DATA_AD5449_H__
-#define __LINUX_PLATFORM_DATA_AD5449_H__
-
-/**
- * enum ad5449_sdo_mode - AD5449 SDO pin configuration
- * @AD5449_SDO_DRIVE_FULL: Drive the SDO pin with full strength.
- * @AD5449_SDO_DRIVE_WEAK: Drive the SDO pin with not full strength.
- * @AD5449_SDO_OPEN_DRAIN: Operate the SDO pin in open-drain mode.
- * @AD5449_SDO_DISABLED: Disable the SDO pin, in this mode it is not possible to
- *			read back from the device.
- */
-enum ad5449_sdo_mode {
-	AD5449_SDO_DRIVE_FULL = 0x0,
-	AD5449_SDO_DRIVE_WEAK = 0x1,
-	AD5449_SDO_OPEN_DRAIN = 0x2,
-	AD5449_SDO_DISABLED = 0x3,
-};
-
-/**
- * struct ad5449_platform_data - Platform data for the ad5449 DAC driver
- * @sdo_mode: SDO pin mode
- * @hardware_clear_to_midscale: Whether asserting the hardware CLR pin sets the
- *			outputs to midscale (true) or to zero scale(false).
- */
-struct ad5449_platform_data {
-	enum ad5449_sdo_mode sdo_mode;
-	bool hardware_clear_to_midscale;
-};
-
-#endif
-- 
cgit v1.2.3


From 33f339a1ba54e56bba57ee9a77c71e385ab4825c Mon Sep 17 00:00:00 2001
From: Tze-nan Wu <Tze-nan.Wu@mediatek.com>
Date: Fri, 30 Aug 2024 16:25:17 +0800
Subject: bpf, net: Fix a potential race in do_sock_getsockopt()

There's a potential race when `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` is
false during the execution of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN`, but
becomes true when `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is called.
This inconsistency can lead to `BPF_CGROUP_RUN_PROG_GETSOCKOPT` receiving
an "-EFAULT" from `__cgroup_bpf_run_filter_getsockopt(max_optlen=0)`.
Scenario shown as below:

           `process A`                      `process B`
           -----------                      ------------
  BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN
                                            enable CGROUP_GETSOCKOPT
  BPF_CGROUP_RUN_PROG_GETSOCKOPT (-EFAULT)

To resolve this, remove the `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` macro and
directly uses `copy_from_sockptr` to ensure that `max_optlen` is always
set before `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is invoked.

Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
Co-developed-by: Yanghui Li <yanghui.li@mediatek.com>
Signed-off-by: Yanghui Li <yanghui.li@mediatek.com>
Co-developed-by: Cheng-Jui Wang <cheng-jui.wang@mediatek.com>
Signed-off-by: Cheng-Jui Wang <cheng-jui.wang@mediatek.com>
Signed-off-by: Tze-nan Wu <Tze-nan.Wu@mediatek.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://patch.msgid.link/20240830082518.23243-1-Tze-nan.Wu@mediatek.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/bpf-cgroup.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index fb3c3e7181e6..ce91d9b2acb9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -390,14 +390,6 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 	__ret;								       \
 })
 
-#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen)			       \
-({									       \
-	int __ret = 0;							       \
-	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))			       \
-		copy_from_sockptr(&__ret, optlen, sizeof(int));		       \
-	__ret;								       \
-})
-
 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen,   \
 				       max_optlen, retval)		       \
 ({									       \
@@ -518,7 +510,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
-#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
 				       optlen, max_optlen, retval) ({ retval; })
 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
-- 
cgit v1.2.3


From 7cb9b5fa218caa899f4288865c4598357bcce4e9 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 3 Sep 2024 15:38:25 -0500
Subject: PCI: endpoint: Fix enum pci_epc_bar_type kerneldoc

e01c9797c0eb ("PCI: endpoint: Clean up hardware description for BARs")
added enum pci_epc_bar_type with incomplete kerneldoc.  Add the missing
piece.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci-epc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 85bdf2adb760..697cc190747d 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -149,6 +149,7 @@ struct pci_epc {
 };
 
 /**
+ * enum pci_epc_bar_type - configurability of endpoint BAR
  * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC.
  * @BAR_FIXED: The BAR mask is fixed by the hardware.
  * @BAR_RESERVED: The BAR should not be touched by an EPF driver.
-- 
cgit v1.2.3


From 364ea7ccaef917a3068236a19a4b31a0623b561a Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 31 Aug 2024 16:20:39 +0200
Subject: power: supply: Change usb_types from an array into a bitmask

The bit_types array just hold a list of valid enum power_supply_usb_type
values which map to 0 - 9. This can easily be represented as a bitmap.

This reduces the size of struct power_supply_desc and further reduces
the data section size by drivers no longer needing to store the array.

This also unifies how usb_types are handled with charge_behaviours,
which allows power_supply_show_usb_type() to be removed.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240831142039.28830-7-hdegoede@redhat.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 72dc7e45c90c..910d407ebe63 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -243,8 +243,7 @@ struct power_supply_desc {
 	const char *name;
 	enum power_supply_type type;
 	u8 charge_behaviours;
-	const enum power_supply_usb_type *usb_types;
-	size_t num_usb_types;
+	u32 usb_types;
 	const enum power_supply_property *properties;
 	size_t num_properties;
 
-- 
cgit v1.2.3


From 8f62819aaace77dd85037ae766eb767f8c4417ce Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Fri, 23 Aug 2024 11:33:23 +0200
Subject: PCI/pwrctl: Rescan bus on a separate thread

If we trigger the bus rescan from sysfs, we'll try to lock the PCI rescan
mutex recursively and deadlock - the platform device will be populated and
probed on the same thread that handles the sysfs write.

Add a workqueue to the pwrctl code on which we schedule the rescan for
controlled PCI devices. While at it: add a new interface for initializing
the pwrctl context where we'd now assign the parent device address and
initialize the workqueue.

Link: https://lore.kernel.org/r/20240823093323.33450-3-brgl@bgdev.pl
Fixes: 4565d2652a37 ("PCI/pwrctl: Add PCI power control core code")
Reported-by: Konrad Dybcio <konradybcio@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 include/linux/pci-pwrctl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h
index 45e9cfe740e4..0d23dddf59ec 100644
--- a/include/linux/pci-pwrctl.h
+++ b/include/linux/pci-pwrctl.h
@@ -7,6 +7,7 @@
 #define __PCI_PWRCTL_H__
 
 #include <linux/notifier.h>
+#include <linux/workqueue.h>
 
 struct device;
 struct device_link;
@@ -41,8 +42,10 @@ struct pci_pwrctl {
 	/* Private: don't use. */
 	struct notifier_block nb;
 	struct device_link *link;
+	struct work_struct work;
 };
 
+void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev);
 int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl);
 void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl);
 int devm_pci_pwrctl_device_set_ready(struct device *dev,
-- 
cgit v1.2.3


From de6c85bf918ea52d5c680f0d130b37ee2ff152d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Aug 2024 09:02:47 +0300
Subject: dma-mapping: clearly mark DMA ops as an architecture feature

DMA ops are a helper for architectures and not for drivers to override
the DMA implementation.

Unfortunately driver authors keep ignoring this.  Make the fact more
clear by renaming the symbol to ARCH_HAS_DMA_OPS and having the two drivers
overriding their dma_ops depend on that.  These drivers should probably be
marked broken, but we can give them a bit of a grace period for that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com> # for IPU6
Acked-by: Robin Murphy <robin.murphy@arm.com>
---
 include/linux/device.h      | 2 +-
 include/linux/dma-map-ops.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 1c5280d28bc3..b4bde8d22697 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -750,7 +750,7 @@ struct device {
 	struct dev_pin_info	*pins;
 #endif
 	struct dev_msi_info	msi;
-#ifdef CONFIG_DMA_OPS
+#ifdef CONFIG_ARCH_HAS_DMA_OPS
 	const struct dma_map_ops *dma_ops;
 #endif
 	u64		*dma_mask;	/* dma mask (if dma'able device) */
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 077b15c93bb8..9668ddf3696e 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -75,7 +75,7 @@ struct dma_map_ops {
 	unsigned long (*get_merge_boundary)(struct device *dev);
 };
 
-#ifdef CONFIG_DMA_OPS
+#ifdef CONFIG_ARCH_HAS_DMA_OPS
 #include <asm/dma-mapping.h>
 
 static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
@@ -90,7 +90,7 @@ static inline void set_dma_ops(struct device *dev,
 {
 	dev->dma_ops = dma_ops;
 }
-#else /* CONFIG_DMA_OPS */
+#else /* CONFIG_ARCH_HAS_DMA_OPS */
 static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
 {
 	return NULL;
@@ -99,7 +99,7 @@ static inline void set_dma_ops(struct device *dev,
 			       const struct dma_map_ops *dma_ops)
 {
 }
-#endif /* CONFIG_DMA_OPS */
+#endif /* CONFIG_ARCH_HAS_DMA_OPS */
 
 #ifdef CONFIG_DMA_CMA
 extern struct cma *dma_contiguous_default_area;
-- 
cgit v1.2.3


From 73ed0baae66df50359c876f65f41179d6ebd2716 Mon Sep 17 00:00:00 2001
From: Chris Li <chrisl@kernel.org>
Date: Tue, 30 Jul 2024 23:49:13 -0700
Subject: mm: swap: swap cluster switch to double link list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: swap: mTHP swap allocator base on swap cluster order",
v5.

This is the short term solutions "swap cluster order" listed in my "Swap
Abstraction" discussion slice 8 in the recent LSF/MM conference.

When commit 845982eb264bc "mm: swap: allow storage of all mTHP orders" is
introduced, it only allocates the mTHP swap entries from the new empty
cluster list.   It has a fragmentation issue reported by Barry.

https://lore.kernel.org/all/CAGsJ_4zAcJkuW016Cfi6wicRr8N9X+GJJhgMQdSMp+Ah+NSgNQ@mail.gmail.com/

The reason is that all the empty clusters have been exhausted while there
are plenty of free swap entries in the cluster that are not 100% free.

Remember the swap allocation order in the cluster.  Keep track of the per
order non full cluster list for later allocation.

This series gives the swap SSD allocation a new separate code path from
the HDD allocation.  The new allocator use cluster list only and do not
global scan swap_map[] without lock any more.

This streamline the swap allocation for SSD.  The code matches the
execution flow much better.

User impact: For users that allocate and free mix order mTHP swapping, It
greatly improves the success rate of the mTHP swap allocation after the
initial phase.

It also performs faster when the swapfile is close to full, because the
allocator can get the non full cluster from a list rather than scanning a
lot of swap_map entries. 

With Barry's mthp test program V2:

Without:
$ ./thp_swap_allocator_test -a
Iteration 1: swpout inc: 32, swpout fallback inc: 192, Fallback percentage: 85.71%
Iteration 2: swpout inc: 0, swpout fallback inc: 231, Fallback percentage: 100.00%
Iteration 3: swpout inc: 0, swpout fallback inc: 227, Fallback percentage: 100.00%
...
Iteration 98: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00%
Iteration 99: swpout inc: 0, swpout fallback inc: 215, Fallback percentage: 100.00%
Iteration 100: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00%

$ ./thp_swap_allocator_test -a -s
Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00%
Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00%
Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00%
..
Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00%
Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00%
Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00%

$ ./thp_swap_allocator_test -s
Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00%
Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00%
Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00%
..
Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00%
Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00%
Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00%

$ ./thp_swap_allocator_test
Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00%
Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00%
Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00%
..
Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00%
Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00%
Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00%

With: # with all 0.00% filter out
$ ./thp_swap_allocator_test -a | grep -v "0.00%"
$ # all result are 0.00%

$ ./thp_swap_allocator_test -a -s | grep -v "0.00%"
./thp_swap_allocator_test -a -s | grep -v "0.00%"
Iteration 14: swpout inc: 223, swpout fallback inc: 3, Fallback percentage: 1.33%
Iteration 19: swpout inc: 219, swpout fallback inc: 7, Fallback percentage: 3.10%
Iteration 28: swpout inc: 225, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 29: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 34: swpout inc: 220, swpout fallback inc: 8, Fallback percentage: 3.51%
Iteration 35: swpout inc: 222, swpout fallback inc: 11, Fallback percentage: 4.72%
Iteration 38: swpout inc: 217, swpout fallback inc: 4, Fallback percentage: 1.81%
Iteration 40: swpout inc: 222, swpout fallback inc: 6, Fallback percentage: 2.63%
Iteration 42: swpout inc: 221, swpout fallback inc: 2, Fallback percentage: 0.90%
Iteration 43: swpout inc: 215, swpout fallback inc: 7, Fallback percentage: 3.15%
Iteration 47: swpout inc: 226, swpout fallback inc: 2, Fallback percentage: 0.88%
Iteration 49: swpout inc: 217, swpout fallback inc: 1, Fallback percentage: 0.46%
Iteration 52: swpout inc: 221, swpout fallback inc: 8, Fallback percentage: 3.49%
Iteration 56: swpout inc: 224, swpout fallback inc: 4, Fallback percentage: 1.75%
Iteration 58: swpout inc: 214, swpout fallback inc: 5, Fallback percentage: 2.28%
Iteration 62: swpout inc: 220, swpout fallback inc: 3, Fallback percentage: 1.35%
Iteration 64: swpout inc: 224, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 67: swpout inc: 221, swpout fallback inc: 1, Fallback percentage: 0.45%
Iteration 75: swpout inc: 220, swpout fallback inc: 9, Fallback percentage: 3.93%
Iteration 82: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 86: swpout inc: 211, swpout fallback inc: 12, Fallback percentage: 5.38%
Iteration 89: swpout inc: 226, swpout fallback inc: 2, Fallback percentage: 0.88%
Iteration 93: swpout inc: 220, swpout fallback inc: 1, Fallback percentage: 0.45%
Iteration 94: swpout inc: 224, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 96: swpout inc: 221, swpout fallback inc: 6, Fallback percentage: 2.64%
Iteration 98: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44%
Iteration 99: swpout inc: 227, swpout fallback inc: 3, Fallback percentage: 1.30%

$ ./thp_swap_allocator_test
./thp_swap_allocator_test
Iteration 1: swpout inc: 233, swpout fallback inc: 0, Fallback percentage: 0.00%
Iteration 2: swpout inc: 131, swpout fallback inc: 101, Fallback percentage: 43.53%
Iteration 3: swpout inc: 71, swpout fallback inc: 155, Fallback percentage: 68.58%
Iteration 4: swpout inc: 55, swpout fallback inc: 168, Fallback percentage: 75.34%
Iteration 5: swpout inc: 35, swpout fallback inc: 191, Fallback percentage: 84.51%
Iteration 6: swpout inc: 25, swpout fallback inc: 199, Fallback percentage: 88.84%
Iteration 7: swpout inc: 23, swpout fallback inc: 205, Fallback percentage: 89.91%
Iteration 8: swpout inc: 9, swpout fallback inc: 219, Fallback percentage: 96.05%
Iteration 9: swpout inc: 13, swpout fallback inc: 213, Fallback percentage: 94.25%
Iteration 10: swpout inc: 12, swpout fallback inc: 216, Fallback percentage: 94.74%
Iteration 11: swpout inc: 16, swpout fallback inc: 213, Fallback percentage: 93.01%
Iteration 12: swpout inc: 10, swpout fallback inc: 210, Fallback percentage: 95.45%
Iteration 13: swpout inc: 16, swpout fallback inc: 212, Fallback percentage: 92.98%
Iteration 14: swpout inc: 12, swpout fallback inc: 212, Fallback percentage: 94.64%
Iteration 15: swpout inc: 15, swpout fallback inc: 211, Fallback percentage: 93.36%
Iteration 16: swpout inc: 15, swpout fallback inc: 200, Fallback percentage: 93.02%
Iteration 17: swpout inc: 9, swpout fallback inc: 220, Fallback percentage: 96.07%

$ ./thp_swap_allocator_test -s
 ./thp_swap_allocator_test -s
Iteration 1: swpout inc: 233, swpout fallback inc: 0, Fallback percentage: 0.00%
Iteration 2: swpout inc: 97, swpout fallback inc: 135, Fallback percentage: 58.19%
Iteration 3: swpout inc: 42, swpout fallback inc: 192, Fallback percentage: 82.05%
Iteration 4: swpout inc: 19, swpout fallback inc: 214, Fallback percentage: 91.85%
Iteration 5: swpout inc: 12, swpout fallback inc: 213, Fallback percentage: 94.67%
Iteration 6: swpout inc: 11, swpout fallback inc: 217, Fallback percentage: 95.18%
Iteration 7: swpout inc: 9, swpout fallback inc: 214, Fallback percentage: 95.96%
Iteration 8: swpout inc: 8, swpout fallback inc: 213, Fallback percentage: 96.38%
Iteration 9: swpout inc: 2, swpout fallback inc: 223, Fallback percentage: 99.11%
Iteration 10: swpout inc: 2, swpout fallback inc: 228, Fallback percentage: 99.13%
Iteration 11: swpout inc: 4, swpout fallback inc: 214, Fallback percentage: 98.17%
Iteration 12: swpout inc: 5, swpout fallback inc: 226, Fallback percentage: 97.84%
Iteration 13: swpout inc: 3, swpout fallback inc: 212, Fallback percentage: 98.60%
Iteration 14: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00%
Iteration 15: swpout inc: 3, swpout fallback inc: 222, Fallback percentage: 98.67%
Iteration 16: swpout inc: 4, swpout fallback inc: 223, Fallback percentage: 98.24%

=========
Kernel compile under tmpfs with cgroup memory.max = 470M.
12 core 24 hyperthreading, 32 jobs. 10 Run each group

SSD swap 10 runs average, 20G swap partition:
With:
user    2929.064
system  1479.381 : 1376.89 1398.22 1444.64 1477.39 1479.04 1497.27
1504.47 1531.4 1532.92 1551.57
real    1441.324

Without:
user    2910.872
system  1482.732 : 1440.01 1451.4 1462.01 1467.47 1467.51 1469.3
1470.19 1496.32 1544.1 1559.01
real    1580.822

Two zram swap: zram0 3.0G zram1 20G.

The idea is forcing the zram0 almost full then overflow to zram1:

With:
user    4320.301
system  4272.403 : 4236.24 4262.81 4264.75 4269.13 4269.44 4273.06
4279.85 4285.98 4289.64 4293.13
real    431.759

Without
user    4301.393
system  4387.672 : 4374.47 4378.3 4380.95 4382.84 4383.06 4388.05
4389.76 4397.16 4398.23 4403.9
real    433.979

------ more test result from Kaiui ----------

Test with build linux kernel using a 4G ZRAM, 1G memory.max limit on top of shmem:

System info: 32 Core AMD Zen2, 64G total memory.

Test 3 times using only 4K pages:
=================================

With:
-----
1838.74user 2411.21system 2:37.86elapsed 2692%CPU (0avgtext+0avgdata 847060maxresident)k
1839.86user 2465.77system 2:39.35elapsed 2701%CPU (0avgtext+0avgdata 847060maxresident)k
1840.26user 2454.68system 2:39.43elapsed 2693%CPU (0avgtext+0avgdata 847060maxresident)k

Summary (~4.6% improment of system time):
User: 1839.62
System: 2443.89: 2465.77 2454.68 2411.21
Real: 158.88

Without:
--------
1837.99user 2575.95system 2:43.09elapsed 2706%CPU (0avgtext+0avgdata 846520maxresident)k
1838.32user 2555.15system 2:42.52elapsed 2709%CPU (0avgtext+0avgdata 846520maxresident)k
1843.02user 2561.55system 2:43.35elapsed 2702%CPU (0avgtext+0avgdata 846520maxresident)k

Summary:
User: 1839.78
System: 2564.22: 2575.95 2555.15 2561.55
Real: 162.99

Test 5 times using enabled all mTHP pages:
==========================================

With:
-----
1796.44user 2937.33system 2:59.09elapsed 2643%CPU (0avgtext+0avgdata 846936maxresident)k
1802.55user 3002.32system 2:54.68elapsed 2750%CPU (0avgtext+0avgdata 847072maxresident)k
1806.59user 2986.53system 2:55.17elapsed 2736%CPU (0avgtext+0avgdata 847092maxresident)k
1803.27user 2982.40system 2:54.49elapsed 2742%CPU (0avgtext+0avgdata 846796maxresident)k
1807.43user 3036.08system 2:56.06elapsed 2751%CPU (0avgtext+0avgdata 846488maxresident)k

Summary (~8.4% improvement of system time):
User: 1803.25
System: 2988.93: 2937.33 3002.32 2986.53 2982.40 3036.08
Real: 175.90

mTHP swapout status:
/sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpout:347721
/sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpout_fallback:3110
/sys/kernel/mm/transparent_hugepage/hugepages-512kB/stats/swpout:3365
/sys/kernel/mm/transparent_hugepage/hugepages-512kB/stats/swpout_fallback:8269
/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/stats/swpout:24
/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/stats/swpout_fallback:3341
/sys/kernel/mm/transparent_hugepage/hugepages-1024kB/stats/swpout:145
/sys/kernel/mm/transparent_hugepage/hugepages-1024kB/stats/swpout_fallback:5038
/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout:322737
/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback:36808
/sys/kernel/mm/transparent_hugepage/hugepages-16kB/stats/swpout:380455
/sys/kernel/mm/transparent_hugepage/hugepages-16kB/stats/swpout_fallback:1010
/sys/kernel/mm/transparent_hugepage/hugepages-256kB/stats/swpout:24973
/sys/kernel/mm/transparent_hugepage/hugepages-256kB/stats/swpout_fallback:13223
/sys/kernel/mm/transparent_hugepage/hugepages-128kB/stats/swpout:197348
/sys/kernel/mm/transparent_hugepage/hugepages-128kB/stats/swpout_fallback:80541

Without:
--------
1794.41user 3151.29system 3:05.97elapsed 2659%CPU (0avgtext+0avgdata 846704maxresident)k
1810.27user 3304.48system 3:05.38elapsed 2759%CPU (0avgtext+0avgdata 846636maxresident)k
1809.84user 3254.85system 3:03.83elapsed 2755%CPU (0avgtext+0avgdata 846952maxresident)k
1813.54user 3259.56system 3:04.28elapsed 2752%CPU (0avgtext+0avgdata 846848maxresident)k
1829.97user 3338.40system 3:07.32elapsed 2759%CPU (0avgtext+0avgdata 847024maxresident)k

Summary:
User: 1811.61
System: 3261.72 : 3151.29 3304.48 3254.85 3259.56 3338.40
Real: 185.356

mTHP swapout status:
hugepages-32kB/stats/swpout:35630
hugepages-32kB/stats/swpout_fallback:1809908
hugepages-512kB/stats/swpout:523
hugepages-512kB/stats/swpout_fallback:55235
hugepages-2048kB/stats/swpout:53
hugepages-2048kB/stats/swpout_fallback:17264
hugepages-1024kB/stats/swpout:85
hugepages-1024kB/stats/swpout_fallback:24979
hugepages-64kB/stats/swpout:30117
hugepages-64kB/stats/swpout_fallback:1825399
hugepages-16kB/stats/swpout:42775
hugepages-16kB/stats/swpout_fallback:1951123
hugepages-256kB/stats/swpout:2326
hugepages-256kB/stats/swpout_fallback:170165
hugepages-128kB/stats/swpout:17925
hugepages-128kB/stats/swpout_fallback:1309757


This patch (of 9):

Previously, the swap cluster used a cluster index as a pointer to
construct a custom single link list type "swap_cluster_list".  The next
cluster pointer is shared with the cluster->count.  It prevents puting the
non free cluster into a list.

Change the cluster to use the standard double link list instead.  This
allows tracing the nonfull cluster in the follow up patch.  That way, it
is faster to get to the nonfull cluster of that order.

Remove the cluster getter/setter for accessing the cluster struct member.

The list operation is protected by the swap_info_struct->lock.

Change cluster code to use "struct swap_cluster_info *" to reference the
cluster rather than by using index.  That is more consistent with the list
manipulation.  It avoids the repeat adding index to the cluser_info.  The
code is easier to understand.

Remove the cluster next pointer is NULL flag, the double link list can
handle the empty list pretty well.

The "swap_cluster_info" struct is two pointer bigger, because 512 swap
entries share one swap_cluster_info struct, it has very little impact on
the average memory usage per swap entry.  For 1TB swapfile, the swap
cluster data structure increases from 8MB to 24MB.

Other than the list conversion, there is no real function change in this
patch.

Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-0-cb9c148b9297@kernel.org
Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-1-cb9c148b9297@kernel.org
Signed-off-by: Chris Li <chrisl@kernel.org>
Reported-by: Barry Song <21cnbao@gmail.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5b920fa2315b..ead568f04f81 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -243,22 +243,20 @@ enum {
  * free clusters are organized into a list. We fetch an entry from the list to
  * get a free cluster.
  *
- * The data field stores next cluster if the cluster is free or cluster usage
- * counter otherwise. The flags field determines if a cluster is free. This is
- * protected by swap_info_struct.lock.
+ * The flags field determines if a cluster is free. This is
+ * protected by cluster lock.
  */
 struct swap_cluster_info {
 	spinlock_t lock;	/*
 				 * Protect swap_cluster_info fields
-				 * and swap_info_struct->swap_map
-				 * elements correspond to the swap
-				 * cluster
+				 * other than list, and swap_info_struct->swap_map
+				 * elements corresponding to the swap cluster.
 				 */
-	unsigned int data:24;
-	unsigned int flags:8;
+	u16 count;
+	u8 flags;
+	struct list_head list;
 };
 #define CLUSTER_FLAG_FREE 1 /* This cluster is free */
-#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
 
 /*
  * The first page in the swap file is the swap header, which is always marked
@@ -283,11 +281,6 @@ struct percpu_cluster {
 	unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
 };
 
-struct swap_cluster_list {
-	struct swap_cluster_info head;
-	struct swap_cluster_info tail;
-};
-
 /*
  * The in-memory structure used to track swap areas.
  */
@@ -300,7 +293,7 @@ struct swap_info_struct {
 	unsigned int	max;		/* extent of the swap_map */
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
-	struct swap_cluster_list free_clusters; /* free clusters list */
+	struct list_head free_clusters; /* free clusters list */
 	unsigned int lowest_bit;	/* index of first free in swap_map */
 	unsigned int highest_bit;	/* index of last free in swap_map */
 	unsigned int pages;		/* total of usable pages of swap */
@@ -331,7 +324,7 @@ struct swap_info_struct {
 					 * list.
 					 */
 	struct work_struct discard_work; /* discard worker */
-	struct swap_cluster_list discard_clusters; /* discard clusters list */
+	struct list_head discard_clusters; /* discard clusters list */
 	struct plist_node avail_lists[]; /*
 					   * entries in swap_avail_heads, one
 					   * entry per node.
-- 
cgit v1.2.3


From d07a46a4ac18786e7f4c98fb08525ed80dd1f642 Mon Sep 17 00:00:00 2001
From: Chris Li <chrisl@kernel.org>
Date: Tue, 30 Jul 2024 23:49:14 -0700
Subject: mm: swap: mTHP allocate swap entries from nonfull list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Track the nonfull cluster as well as the empty cluster on lists.  Each
order has one nonfull cluster list.

The cluster will remember which order it was used during new cluster
allocation.

When the cluster has free entry, add to the nonfull[order] list.   When
the free cluster list is empty, also allocate from the nonempty list of
that order.

This improves the mTHP swap allocation success rate.

There are limitations if the distribution of numbers of different orders
of mTHP changes a lot.  e.g.  there are a lot of nonfull cluster assign to
order A while later time there are a lot of order B allocation while very
little allocation in order A.  Currently the cluster used by order A will
not reused by order B unless the cluster is 100% empty.

Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-2-cb9c148b9297@kernel.org
Signed-off-by: Chris Li <chrisl@kernel.org>
Reported-by: Barry Song <21cnbao@gmail.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ead568f04f81..7007a3c2336c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -254,9 +254,11 @@ struct swap_cluster_info {
 				 */
 	u16 count;
 	u8 flags;
+	u8 order;
 	struct list_head list;
 };
 #define CLUSTER_FLAG_FREE 1 /* This cluster is free */
+#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */
 
 /*
  * The first page in the swap file is the swap header, which is always marked
@@ -294,6 +296,8 @@ struct swap_info_struct {
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
 	struct list_head free_clusters; /* free clusters list */
+	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
+					/* list of cluster that contains at least one free slot */
 	unsigned int lowest_bit;	/* index of first free in swap_map */
 	unsigned int highest_bit;	/* index of last free in swap_map */
 	unsigned int pages;		/* total of usable pages of swap */
-- 
cgit v1.2.3


From 477cb7ba28892eda112c79d8f75d10edabfc3050 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 30 Jul 2024 23:49:19 -0700
Subject: mm: swap: add a fragment cluster list

Now swap cluster allocator arranges the clusters in LRU style, so the
"cold" cluster stay at the head of nonfull lists are the ones that were
used for allocation long time ago and still partially occupied.  So if
allocator can't find enough contiguous slots to satisfy an high order
allocation, it's unlikely there will be slot being free on them to satisfy
the allocation, at least in a short period.

As a result, nonfull cluster scanning will waste time repeatly scanning
the unusable head of the list.

Also, multiple CPUs could content on the same head cluster of nonfull
list.  Unlike free clusters which are removed from the list when any CPU
starts using it, nonfull cluster stays on the head.

So introduce a new list frag list, all scanned nonfull clusters will be
moved to this list.  Both for avoiding repeated scanning and contention.

Frag list is still used as fallback for allocations, so if one CPU failed
to allocate one order of slots, it can still steal other CPU's clusters.
And order 0 will favor the fragmented clusters to better protect nonfull
clusters

If any slots on a fragment list are being freed, move the fragment list
back to nonfull list indicating it worth another scan on the cluster.
Compared to scan upon freeing a slot, this keep the scanning lazy and save
some CPU if there are still other clusters to use.

It may seems unneccessay to keep the fragmented cluster on list at all if
they can't be used for specific order allocation.  But this will start to
make sense once reclaim dring scanning is ready.

Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-7-cb9c148b9297@kernel.org
Signed-off-by: Kairui Song <kasong@tencent.com>
Reported-by: Barry Song <21cnbao@gmail.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7007a3c2336c..c526cd3e7b31 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -259,6 +259,7 @@ struct swap_cluster_info {
 };
 #define CLUSTER_FLAG_FREE 1 /* This cluster is free */
 #define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */
+#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */
 
 /*
  * The first page in the swap file is the swap header, which is always marked
@@ -298,6 +299,8 @@ struct swap_info_struct {
 	struct list_head free_clusters; /* free clusters list */
 	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
 					/* list of cluster that contains at least one free slot */
+	struct list_head frag_clusters[SWAP_NR_ORDERS];
+					/* list of cluster that are fragmented or contented */
 	unsigned int lowest_bit;	/* index of first free in swap_map */
 	unsigned int highest_bit;	/* index of last free in swap_map */
 	unsigned int pages;		/* total of usable pages of swap */
-- 
cgit v1.2.3


From 661383c6111a38c88df61af6bfbcfacd2ff20a67 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 30 Jul 2024 23:49:20 -0700
Subject: mm: swap: relaim the cached parts that got scanned

This commit implements reclaim during scan for cluster allocator.

Cluster scanning were unable to reuse SWAP_HAS_CACHE slots, which could
result in low allocation success rate or early OOM.

So to ensure maximum allocation success rate, integrate reclaiming with
scanning.  If found a range of suitable swap slots but fragmented due to
HAS_CACHE, just try to reclaim the slots.

Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-8-cb9c148b9297@kernel.org
Signed-off-by: Kairui Song <kasong@tencent.com>
Reported-by: Barry Song <21cnbao@gmail.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c526cd3e7b31..e372122e4f45 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -301,6 +301,7 @@ struct swap_info_struct {
 					/* list of cluster that contains at least one free slot */
 	struct list_head frag_clusters[SWAP_NR_ORDERS];
 					/* list of cluster that are fragmented or contented */
+	unsigned int frag_cluster_nr[SWAP_NR_ORDERS];
 	unsigned int lowest_bit;	/* index of first free in swap_map */
 	unsigned int highest_bit;	/* index of last free in swap_map */
 	unsigned int pages;		/* total of usable pages of swap */
-- 
cgit v1.2.3


From 2cacbdfdee65b18f9952620e762eab043d71b564 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 30 Jul 2024 23:49:21 -0700
Subject: mm: swap: add a adaptive full cluster cache reclaim

Link all full cluster with one full list, and reclaim from it when the
allocation have ran out of all usable clusters.

There are many reason a folio can end up being in the swap cache while
having no swap count reference.  So the best way to search for such slots
is still by iterating the swap clusters.

With the list as an LRU, iterating from the oldest cluster and keep them
rotating is a very doable and clean way to free up potentially not inuse
clusters.

When any allocation failure, try reclaim and rotate only one cluster.
This is adaptive for high order allocations they can tolerate fallback.
So this avoids latency, and give the full cluster list an fair chance to
get reclaimed.  It release the usage stress for the fallback order 0
allocation or following up high order allocation.

If the swap device is getting very full, reclaim more aggresively to
ensure no OOM will happen.  This ensures order 0 heavy workload won't go
OOM as order 0 won't fail if any cluster still have any space.

[ryncsn@gmail.com: fix discard of full cluster]
  Link: https://lkml.kernel.org/r/CAMgjq7CWwK75_2Zi5P40K08pk9iqOcuWKL6khu=x4Yg_nXaQag@mail.gmail.com
Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-9-cb9c148b9297@kernel.org
Signed-off-by: Kairui Song <kasong@tencent.com>
Reported-by: Barry Song <21cnbao@gmail.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kairui Song <ryncsn@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index e372122e4f45..1c8f844a9f0f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -260,6 +260,7 @@ struct swap_cluster_info {
 #define CLUSTER_FLAG_FREE 1 /* This cluster is free */
 #define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */
 #define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */
+#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */
 
 /*
  * The first page in the swap file is the swap header, which is always marked
@@ -297,6 +298,7 @@ struct swap_info_struct {
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
 	struct list_head free_clusters; /* free clusters list */
+	struct list_head full_clusters; /* full clusters list */
 	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
 					/* list of cluster that contains at least one free slot */
 	struct list_head frag_clusters[SWAP_NR_ORDERS];
-- 
cgit v1.2.3


From 46bcce503197d1019ee5c49ccde978e31298e35f Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:40:51 +0300
Subject: arch, mm: move definition of node_data to generic code

Every architecture that supports NUMA defines node_data in the same way:

	struct pglist_data *node_data[MAX_NUMNODES];

No reason to keep multiple copies of this definition and its forward
declarations, especially when such forward declaration is the only thing
in include/asm/mmzone.h for many architectures.

Add definition and declaration of node_data to generic code and drop
architecture-specific versions.

Link: https://lkml.kernel.org/r/20240807064110.1003856-8-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/mmzone.h | 5 +++++
 include/linux/numa.h         | 3 +++
 2 files changed, 8 insertions(+)
 create mode 100644 include/asm-generic/mmzone.h

(limited to 'include')

diff --git a/include/asm-generic/mmzone.h b/include/asm-generic/mmzone.h
new file mode 100644
index 000000000000..2ab5193e8394
--- /dev/null
+++ b/include/asm-generic/mmzone.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_MMZONE_H
+#define _ASM_GENERIC_MMZONE_H
+
+#endif
diff --git a/include/linux/numa.h b/include/linux/numa.h
index eb19503604fe..e5841d4057ab 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -30,6 +30,9 @@ static inline bool numa_valid_node(int nid)
 #ifdef CONFIG_NUMA
 #include <asm/sparsemem.h>
 
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)	(node_data[nid])
+
 /* Generic implementation available */
 int numa_nearest_node(int node, unsigned int state);
 
-- 
cgit v1.2.3


From ec164cf1dd3df4ae58931b9b491b06a90d5c22d3 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:40:52 +0300
Subject: mm: drop CONFIG_HAVE_ARCH_NODEDATA_EXTENSION

There are no users of HAVE_ARCH_NODEDATA_EXTENSION left, so
arch_alloc_nodedata() and arch_refresh_nodedata() are not needed anymore.

Replace the call to arch_alloc_nodedata() in free_area_init() with a new
helper alloc_offline_node_data(), remove arch_refresh_nodedata() and
cleanup include/linux/memory_hotplug.h from the associated ifdefery.

Link: https://lkml.kernel.org/r/20240807064110.1003856-9-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 48 ------------------------------------------
 include/linux/numa.h           |  4 ++++
 2 files changed, 4 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ebe876930e78..b27ddce5d324 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -16,54 +16,6 @@ struct resource;
 struct vmem_altmap;
 struct dev_pagemap;
 
-#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
-/*
- * For supporting node-hotadd, we have to allocate a new pgdat.
- *
- * If an arch has generic style NODE_DATA(),
- * node_data[nid] = kzalloc() works well. But it depends on the architecture.
- *
- * In general, generic_alloc_nodedata() is used.
- *
- */
-extern pg_data_t *arch_alloc_nodedata(int nid);
-extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
-
-#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
-
-#define arch_alloc_nodedata(nid)	generic_alloc_nodedata(nid)
-
-#ifdef CONFIG_NUMA
-/*
- * XXX: node aware allocation can't work well to get new node's memory at this time.
- *	Because, pgdat for the new node is not allocated/initialized yet itself.
- *	To use new node's memory, more consideration will be necessary.
- */
-#define generic_alloc_nodedata(nid)				\
-({								\
-	memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES);	\
-})
-
-extern pg_data_t *node_data[];
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
-	node_data[nid] = pgdat;
-}
-
-#else /* !CONFIG_NUMA */
-
-/* never called */
-static inline pg_data_t *generic_alloc_nodedata(int nid)
-{
-	BUG();
-	return NULL;
-}
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
-}
-#endif /* CONFIG_NUMA */
-#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 struct page *pfn_to_online_page(unsigned long pfn);
 
diff --git a/include/linux/numa.h b/include/linux/numa.h
index e5841d4057ab..b41b1569781b 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -33,6 +33,8 @@ static inline bool numa_valid_node(int nid)
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
+void __init alloc_offline_node_data(int nid);
+
 /* Generic implementation available */
 int numa_nearest_node(int node, unsigned int state);
 
@@ -60,6 +62,8 @@ static inline int phys_to_target_node(u64 start)
 {
 	return 0;
 }
+
+static inline void alloc_offline_node_data(int nid) {}
 #endif
 
 #define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE)
-- 
cgit v1.2.3


From 3515863d9f29dd476cdad875fbd558fc85b4c511 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:40:53 +0300
Subject: arch, mm: pull out allocation of NODE_DATA to generic code

Architectures that support NUMA duplicate the code that allocates
NODE_DATA on the node-local memory with slight variations in reporting of
the addresses where the memory was allocated.

Use x86 version as the basis for the generic alloc_node_data() function
and call this function in architecture specific numa initialization.

Round up node data size to SMP_CACHE_BYTES rather than to PAGE_SIZE like
x86 used to do since the bootmem era when allocation granularity was
PAGE_SIZE anyway.

Link: https://lkml.kernel.org/r/20240807064110.1003856-10-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/numa.h b/include/linux/numa.h
index b41b1569781b..3567e40329eb 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -33,6 +33,7 @@ static inline bool numa_valid_node(int nid)
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
+void __init alloc_node_data(int nid);
 void __init alloc_offline_node_data(int nid);
 
 /* Generic implementation available */
-- 
cgit v1.2.3


From 87482708210ff3333adab4dc5f1a491793159d43 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:01 +0300
Subject: mm: introduce numa_memblks

Move code dealing with numa_memblks from arch/x86 to mm/ and add Kconfig
options to let x86 select it in its Kconfig.

This code will be later reused by arch_numa.

No functional changes.

Link: https://lkml.kernel.org/r/20240807064110.1003856-18-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 include/linux/numa_memblks.h

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
new file mode 100644
index 000000000000..6981cf97d2c9
--- /dev/null
+++ b/include/linux/numa_memblks.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NUMA_MEMBLKS_H
+#define __NUMA_MEMBLKS_H
+
+#ifdef CONFIG_NUMA_MEMBLKS
+#include <linux/types.h>
+
+#define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
+
+struct numa_memblk {
+	u64			start;
+	u64			end;
+	int			nid;
+};
+
+struct numa_meminfo {
+	int			nr_blks;
+	struct numa_memblk	blk[NR_NODE_MEMBLKS];
+};
+
+extern struct numa_meminfo numa_meminfo __initdata_or_meminfo;
+extern struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
+
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
+
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
+int __init numa_register_meminfo(struct numa_meminfo *mi);
+
+void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+				       const struct numa_meminfo *mi);
+
+#endif /* CONFIG_NUMA_MEMBLKS */
+
+#endif	/* __NUMA_MEMBLKS_H */
-- 
cgit v1.2.3


From 75f9d4cc4eb5e15dd5ce843e33de7f6346a0d4fa Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:02 +0300
Subject: mm: move numa_distance and related code from x86 to numa_memblks

Move code dealing with numa_distance array from arch/x86 to
mm/numa_memblks.c

This code will be later reused by arch_numa.

No functional changes.

Link: https://lkml.kernel.org/r/20240807064110.1003856-19-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index 6981cf97d2c9..968a590535ac 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -7,6 +7,10 @@
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
 
+extern int numa_distance_cnt;
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_reset_distance(void);
+
 struct numa_memblk {
 	u64			start;
 	u64			end;
-- 
cgit v1.2.3


From b0c4e27c687177aa36048110a12cba94bf291452 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:03 +0300
Subject: mm: introduce numa_emulation

Move numa_emulation code from arch/x86 to mm/numa_emulation.c

This code will be later reused by arch_numa.

No functional changes.

Link: https://lkml.kernel.org/r/20240807064110.1003856-20-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index 968a590535ac..f81f98678074 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -34,6 +34,23 @@ int __init numa_register_meminfo(struct numa_meminfo *mi);
 void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
 				       const struct numa_meminfo *mi);
 
+#ifdef CONFIG_NUMA_EMU
+int numa_emu_cmdline(char *str);
+void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
+					unsigned int nr_emu_nids);
+u64 __init numa_emu_dma_end(void);
+void __init numa_emulation(struct numa_meminfo *numa_meminfo,
+			   int numa_dist_cnt);
+#else
+static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
+				  int numa_dist_cnt)
+{ }
+static inline int numa_emu_cmdline(char *str)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_NUMA_EMU */
+
 #endif /* CONFIG_NUMA_MEMBLKS */
 
 #endif	/* __NUMA_MEMBLKS_H */
-- 
cgit v1.2.3


From 692d73d2f0f75e58828ab05872b3789ae1f486ef Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:04 +0300
Subject: mm: numa_memblks: introduce numa_memblks_init

Move most of x86::numa_init() to numa_memblks so that the latter will be
more self-contained.

With this numa_memblk data structures should not be exposed to the
architecture specific code.

Link: https://lkml.kernel.org/r/20240807064110.1003856-21-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index f81f98678074..07381320848f 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -34,6 +34,9 @@ int __init numa_register_meminfo(struct numa_meminfo *mi);
 void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
 				       const struct numa_meminfo *mi);
 
+int __init numa_memblks_init(int (*init_func)(void),
+			     bool memblock_force_top_down);
+
 #ifdef CONFIG_NUMA_EMU
 int numa_emu_cmdline(char *str);
 void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
-- 
cgit v1.2.3


From 317ef4598bdc48c0196adc02ed4edaf82af279f2 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:05 +0300
Subject: mm: numa_memblks: make several functions and variables static

Make functions and variables that are exclusively used by numa_memblks
static.

Move numa_nodemask_from_meminfo() before its callers to avoid forward
declaration.

Link: https://lkml.kernel.org/r/20240807064110.1003856-22-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index 07381320848f..5c6e12ad0b7a 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -7,7 +7,6 @@
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
 
-extern int numa_distance_cnt;
 void __init numa_set_distance(int from, int to, int distance);
 void __init numa_reset_distance(void);
 
@@ -22,17 +21,10 @@ struct numa_meminfo {
 	struct numa_memblk	blk[NR_NODE_MEMBLKS];
 };
 
-extern struct numa_meminfo numa_meminfo __initdata_or_meminfo;
-extern struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
-
 int __init numa_add_memblk(int nodeid, u64 start, u64 end);
 void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
 
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
-int __init numa_register_meminfo(struct numa_meminfo *mi);
-
-void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
-				       const struct numa_meminfo *mi);
 
 int __init numa_memblks_init(int (*init_func)(void),
 			     bool memblock_force_top_down);
-- 
cgit v1.2.3


From 767507654c22578ea0b51d181211b2e7714ea7cd Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:08 +0300
Subject: arch_numa: switch over to numa_memblks

Until now arch_numa was directly translating firmware NUMA information
to memblock.

Using numa_memblks as an intermediate step has a few advantages:
* alignment with more battle tested x86 implementation
* availability of NUMA emulation
* maintaining node information for not yet populated memory

Adjust a few places in numa_memblks to compile with 32-bit phys_addr_t and
replace current functionality related to numa_add_memblk() and
__node_distance() in arch_numa with the implementation based on
numa_memblks and add functions required by numa_emulation.

[rppt@kernel.org: fix section mismatch]
  Link: https://lkml.kernel.org/r/ZrO6cExVz1He_yPn@kernel.org
[rppt@kernel.org: PFN_PHYS() translation is unnecessary here]
  Link: https://lkml.kernel.org/r/Zs2T5wkSYO9MGcab@kernel.org
Link: https://lkml.kernel.org/r/20240807064110.1003856-25-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/numa.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h
index c32e0cf23c90..e063d6487f66 100644
--- a/include/asm-generic/numa.h
+++ b/include/asm-generic/numa.h
@@ -32,10 +32,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
 
 void __init arch_numa_init(void);
 int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-void __init numa_set_distance(int from, int to, int distance);
-void __init numa_free_distance(void);
 void __init early_map_cpu_to_node(unsigned int cpu, int nid);
-int __init early_cpu_to_node(int cpu);
+int early_cpu_to_node(int cpu);
 void numa_store_cpu_info(unsigned int cpu);
 void numa_add_cpu(unsigned int cpu);
 void numa_remove_cpu(unsigned int cpu);
@@ -51,4 +49,8 @@ static inline int early_cpu_to_node(int cpu) { return 0; }
 
 #endif	/* CONFIG_NUMA */
 
+#ifdef CONFIG_NUMA_EMU
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
+#endif
+
 #endif	/* __ASM_GENERIC_NUMA_H */
-- 
cgit v1.2.3


From 1b5695b02444660297cc54cab44f123ff28de2cc Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 7 Aug 2024 09:41:09 +0300
Subject: mm: make range-to-target_node lookup facility a part of numa_memblks

The x86 implementation of range-to-target_node lookup (i.e.
phys_to_target_node() and memory_add_physaddr_to_nid()) relies on
numa_memblks.

Since numa_memblks are now part of the generic code, move these functions
from x86 to mm/numa_memblks.c and select CONFIG_NUMA_KEEP_MEMINFO when
CONFIG_NUMA_MEMBLKS=y for dax and cxl.

[rppt@kernel.org: fix build]
  Link: https://lkml.kernel.org/r/ZtVfSt_zloPdDqVB@kernel.org
Link: https://lkml.kernel.org/r/20240807064110.1003856-26-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index 5c6e12ad0b7a..cfad6ce7e1bd 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -46,6 +46,13 @@ static inline int numa_emu_cmdline(char *str)
 }
 #endif /* CONFIG_NUMA_EMU */
 
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+extern int phys_to_target_node(u64 start);
+#define phys_to_target_node phys_to_target_node
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif /* CONFIG_NUMA_KEEP_MEMINFO */
+
 #endif /* CONFIG_NUMA_MEMBLKS */
 
 #endif	/* __NUMA_MEMBLKS_H */
-- 
cgit v1.2.3


From 650180760be6bb448609d4d155eef3b728ace641 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 12 Aug 2024 15:42:02 +0800
Subject: mm: swap: extend swap_shmem_alloc() to support batch SWAP_MAP_SHMEM
 flag setting

Patch series "support large folio swap-out and swap-in for shmem", v5.

Shmem will support large folio allocation [1] [2] to get a better
performance, however, the memory reclaim still splits the precious large
folios when trying to swap-out shmem, which may lead to the memory
fragmentation issue and can not take advantage of the large folio for
shmeme.

Moreover, the swap code already supports for swapping out large folio
without split, and large folio swap-in[3] series is queued into
mm-unstable branch.  Hence this patch set also supports the large folio
swap-out and swap-in for shmem.


This patch (of 9):

To support shmem large folio swap operations, add a new parameter to
swap_shmem_alloc() that allows batch SWAP_MAP_SHMEM flag setting for shmem
swap entries.

While we are at it, using folio_nr_pages() to get the number of pages of
the folio as a preparation.

Link: https://lkml.kernel.org/r/cover.1723434324.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/99f64115d04b285e009580eb177352c57119ffd0.1723434324.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1c8f844a9f0f..248db1dd7812 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -481,7 +481,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
-extern void swap_shmem_alloc(swp_entry_t);
+extern void swap_shmem_alloc(swp_entry_t, int);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t entry, int nr);
 extern void swap_free_nr(swp_entry_t entry, int nr_pages);
@@ -548,7 +548,7 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 	return 0;
 }
 
-static inline void swap_shmem_alloc(swp_entry_t swp)
+static inline void swap_shmem_alloc(swp_entry_t swp, int nr)
 {
 }
 
-- 
cgit v1.2.3


From 809bc86517cc408b5b8cb8e08e69096639432bc8 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 12 Aug 2024 15:42:10 +0800
Subject: mm: shmem: support large folio swap out

Shmem will support large folio allocation [1] [2] to get a better
performance, however, the memory reclaim still splits the precious large
folios when trying to swap out shmem, which may lead to the memory
fragmentation issue and can not take advantage of the large folio for
shmeme.

Moreover, the swap code already supports for swapping out large folio
without split, hence this patch set supports the large folio swap out for
shmem.

Note the i915_gem_shmem driver still need to be split when swapping, thus
add a new flag 'split_large_folio' for writeback_control to indicate
spliting the large folio.

[1] https://lore.kernel.org/all/cover.1717495894.git.baolin.wang@linux.alibaba.com/
[2] https://lore.kernel.org/all/20240515055719.32577-1-da.gomez@samsung.com/

[hughd@google.com: shmem_writepage() split folio at EOF before swapout]
  Link: https://lkml.kernel.org/r/aef55f8d-6040-692d-65e3-16150cce4440@google.com
[baolin.wang@linux.alibaba.com: remove the wbc->split_large_folio per Hugh]
  Link: https://lkml.kernel.org/r/1236a002daa301b3b9ba73d6c0fab348427cf295.1724833399.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/d80c21abd20e1b0f5ca66b330f074060fb2f082d.1723434324.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/writeback.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1a54676d843a..51278327b3c6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,6 +79,9 @@ struct writeback_control {
 	 */
 	struct swap_iocb **swap_plug;
 
+	/* Target list for splitting a large folio */
+	struct list_head *list;
+
 	/* internal fields used by the ->writepages implementation: */
 	struct folio_batch fbatch;
 	pgoff_t index;
-- 
cgit v1.2.3


From f77f0c7514789577125c1b2df145703161736359 Mon Sep 17 00:00:00 2001
From: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Date: Wed, 14 Aug 2024 17:42:27 +0000
Subject: mm,memcg: provide per-cgroup counters for NUMA balancing operations

The ability to observe the demotion and promotion decisions made by the
kernel on a per-cgroup basis is important for monitoring and tuning
containerized workloads on machines equipped with tiered memory.

Different containers in the system may experience drastically different
memory tiering actions that cannot be distinguished from the global
counters alone.

For example, a container running a workload that has a much hotter memory
accesses will likely see more promotions and fewer demotions, potentially
depriving a colocated container of top tier memory to such an extent that
its performance degrades unacceptably.

For another example, some containers may exhibit longer periods between
data reuse, causing much more numa_hint_faults than numa_pages_migrated.
In this case, tuning hot_threshold_ms may be appropriate, but the signal
can easily be lost if only global counters are available.

In the long term, we hope to introduce per-cgroup control of promotion and
demotion actions to implement memory placement policies in tiering.

This patch set adds seven counters to memory.stat in a cgroup:
numa_pages_migrated, numa_pte_updates, numa_hint_faults, pgdemote_kswapd,
pgdemote_khugepaged, pgdemote_direct and pgpromote_success.  pgdemote_*
and pgpromote_success are also available in memory.numa_stat.

count_memcg_events_mm() is added to count multiple event occurrences at
once, and get_mem_cgroup_from_folio() is added because we need to get a
reference to the memcg of a folio before it's migrated to track
numa_pages_migrated.  The accounting of PGDEMOTE_* is moved to
shrink_inactive_list() before being changed to per-cgroup.

[kaiyang2@cs.cmu.edu: add documentation of the memcg counters in cgroup-v2.rst]
  Link: https://lkml.kernel.org/r/20240814235122.252309-1-kaiyang2@cs.cmu.edu
Link: https://lkml.kernel.org/r/20240814174227.30639-1-kaiyang2@cs.cmu.edu
Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Wei Xu <weixugc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 24 +++++++++++++++++++++---
 include/linux/vmstat.h     |  1 +
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ed170399179a..fe05fdb92779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -784,6 +784,8 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
 struct mem_cgroup *get_mem_cgroup_from_current(void);
 
+struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
+
 struct lruvec *folio_lruvec_lock(struct folio *folio);
 struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
@@ -1028,8 +1030,8 @@ static inline void count_memcg_folio_events(struct folio *folio,
 		count_memcg_events(memcg, idx, nr);
 }
 
-static inline void count_memcg_event_mm(struct mm_struct *mm,
-					enum vm_event_item idx)
+static inline void count_memcg_events_mm(struct mm_struct *mm,
+					enum vm_event_item idx, unsigned long count)
 {
 	struct mem_cgroup *memcg;
 
@@ -1039,10 +1041,16 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 	if (likely(memcg))
-		count_memcg_events(memcg, idx, 1);
+		count_memcg_events(memcg, idx, count);
 	rcu_read_unlock();
 }
 
+static inline void count_memcg_event_mm(struct mm_struct *mm,
+					enum vm_event_item idx)
+{
+	count_memcg_events_mm(mm, idx, 1);
+}
+
 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 				      enum memcg_memory_event event)
 {
@@ -1262,6 +1270,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
 	return NULL;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
+{
+	return NULL;
+}
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
 {
@@ -1484,6 +1497,11 @@ static inline void count_memcg_folio_events(struct folio *folio,
 {
 }
 
+static inline void count_memcg_events_mm(struct mm_struct *mm,
+					enum vm_event_item idx, unsigned long count)
+{
+}
+
 static inline
 void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 9eb77c9007e6..d2761bf8ff32 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -32,6 +32,7 @@ struct reclaim_stat {
 	unsigned nr_ref_keep;
 	unsigned nr_unmap_fail;
 	unsigned nr_lazyfree_fail;
+	unsigned nr_demoted;
 };
 
 /* Stat data for system wide items */
-- 
cgit v1.2.3


From e98337d11bbdfa3e3f0fb99aa93e40f97549e0cd Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 13 Aug 2024 21:54:49 -0600
Subject: mm/contig_alloc: support __GFP_COMP

Patch series "mm/hugetlb: alloc/free gigantic folios", v2.

Use __GFP_COMP for gigantic folios can greatly reduce not only the amount
of code but also the allocation and free time.

Approximate LOC to mm/hugetlb.c: +60, -240

Allocate and free 500 1GB hugeTLB memory without HVO by:
  time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
  time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages

       Before  After
Alloc  ~13s    ~10s
Free   ~15s    <1s

The above magnitude generally holds for multiple x86 and arm64 CPU
models.

Perf profile before:
  Alloc
    - 99.99% alloc_pool_huge_folio
       - __alloc_fresh_hugetlb_folio
          - 83.23% alloc_contig_pages_noprof
             - 47.46% alloc_contig_range_noprof
                - 20.96% isolate_freepages_range
                     16.10% split_page
                - 14.10% start_isolate_page_range
                - 12.02% undo_isolate_page_range

  Free
    - update_and_free_pages_bulk
       - 87.71% free_contig_range
          - 76.02% free_unref_page
             - 41.30% free_unref_page_commit
                - 32.58% free_pcppages_bulk
                   - 24.75% __free_one_page
               13.96% _raw_spin_trylock
         12.27% __update_and_free_hugetlb_folio

Perf profile after:
  Alloc
    - 99.99% alloc_pool_huge_folio
         alloc_gigantic_folio
       - alloc_contig_pages_noprof
          - 59.15% alloc_contig_range_noprof
             - 20.72% start_isolate_page_range
               20.64% prep_new_page
             - 17.13% undo_isolate_page_range

  Free
    - update_and_free_pages_bulk
       - __folio_put
       - __free_pages_ok
            7.46% free_tail_page_prepare
          - 1.97% free_one_page
               1.86% __free_one_page

This patch (of 3):

Support __GFP_COMP in alloc_contig_range().  When the flag is set, upon
success the function returns a large folio prepared by prep_new_page(),
rather than a range of order-0 pages prepared by split_free_pages() (which
is renamed from split_map_pages()).

alloc_contig_range() can be used to allocate folios larger than
MAX_PAGE_ORDER, e.g., gigantic hugeTLB folios.  So on the free path,
free_one_page() needs to handle that by split_large_buddy().

[akpm@linux-foundation.org: fix folio_alloc_gigantic_noprof() WARN expression, per Yu Liao]
Link: https://lkml.kernel.org/r/20240814035451.773331-1-yuzhao@google.com
Link: https://lkml.kernel.org/r/20240814035451.773331-2-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Frank van der Linden <fvdl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f53f76e0b17e..03ba9563c6db 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -446,4 +446,27 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
 #endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 
+#ifdef CONFIG_CONTIG_ALLOC
+static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
+							int nid, nodemask_t *node)
+{
+	struct page *page;
+
+	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
+		return NULL;
+
+	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
+
+	return page ? page_folio(page) : NULL;
+}
+#else
+static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
+							int nid, nodemask_t *node)
+{
+	return NULL;
+}
+#endif
+/* This should be paired with folio_put() rather than free_contig_range(). */
+#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.2.3


From 463586e9ff398f951a9a63d98a3b93f44434d20c Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 13 Aug 2024 21:54:50 -0600
Subject: mm/cma: add cma_{alloc,free}_folio()

With alloc_contig_range() and free_contig_range() supporting large folios,
CMA can allocate and free large folios too, by cma_alloc_folio() and
cma_free_folio().

[yuzhao@google.com: fix WARN in cma_alloc_folio()]
  Link: https://lkml.kernel.org/r/Zsd0PgAQmbpR8jS6@google.com
Link: https://lkml.kernel.org/r/20240814035451.773331-3-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cma.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 9db877506ea8..d15b64f51336 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -52,4 +52,20 @@ extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
 
 extern void cma_reserve_pages_on_error(struct cma *cma);
+
+#ifdef CONFIG_CMA
+struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
+bool cma_free_folio(struct cma *cma, const struct folio *folio);
+#else
+static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
+{
+	return false;
+}
+#endif
+
 #endif
-- 
cgit v1.2.3


From cf54f310d0d313bce6505d010a555948b0ae0e2a Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 13 Aug 2024 21:54:51 -0600
Subject: mm/hugetlb: use __GFP_COMP for gigantic folios

Use __GFP_COMP for gigantic folios to greatly reduce not only the amount
of code but also the allocation and free time.

LOC (approximately): +60, -240

Allocate and free 500 1GB hugeTLB memory without HVO by:
  time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
  time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages

       Before  After
Alloc  ~13s    ~10s
Free   ~15s    <1s

The above magnitude generally holds for multiple x86 and arm64 CPU models.

Link: https://lkml.kernel.org/r/20240814035451.773331-4-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: Frank van der Linden <fvdl@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3100a52ceb73..98c47c394b89 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -896,10 +896,11 @@ static inline bool hugepage_movable_supported(struct hstate *h)
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
-	if (hugepage_movable_supported(h))
-		return GFP_HIGHUSER_MOVABLE;
-	else
-		return GFP_HIGHUSER;
+	gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
+
+	gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
+
+	return gfp;
 }
 
 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
-- 
cgit v1.2.3


From d0b003ce97ad6518dea4b0ed70081df95de04179 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 16 Aug 2024 12:32:46 +0200
Subject: mm/rmap: use folio->_mapcount for small folios

We have some cases left whereby we operate on small folios and still refer
to page->_mapcount.  Let's just use folio->_mapcount instead, which
currently still overlays page->_mapcount, so no change.

This change will make it easier to later spot any remaining users of
page->_mapcount that target tail pages.

Link: https://lkml.kernel.org/r/20240816103246.719209-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0978c64f49d8..91b5935e8485 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -331,7 +331,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 	switch (level) {
 	case RMAP_LEVEL_PTE:
 		if (!folio_test_large(folio)) {
-			atomic_inc(&page->_mapcount);
+			atomic_inc(&folio->_mapcount);
 			break;
 		}
 
@@ -425,7 +425,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		if (!folio_test_large(folio)) {
 			if (PageAnonExclusive(page))
 				ClearPageAnonExclusive(page);
-			atomic_inc(&page->_mapcount);
+			atomic_inc(&folio->_mapcount);
 			break;
 		}
 
-- 
cgit v1.2.3


From 489a744e5fb16503b495ad1fa3dd1abf25b224e7 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 13 Aug 2024 00:34:35 +0200
Subject: mm: krealloc: clarify valid usage of __GFP_ZERO

Properly document that if __GFP_ZERO logic is requested, callers must
ensure that, starting with the initial memory allocation, every subsequent
call to this API for the same memory allocation is flagged with
__GFP_ZERO.  Otherwise, it is possible that __GFP_ZERO is not fully
honored by this API.

Link: https://lkml.kernel.org/r/20240812223707.32049-2-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/slab.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index c9cb42203183..2282e67a01c7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -733,6 +733,16 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz
  * @new_n: new number of elements to alloc
  * @new_size: new size of a single member of the array
  * @flags: the type of memory to allocate (see kmalloc)
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * See krealloc_noprof() for further details.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
  */
 static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p,
 								       size_t new_n,
-- 
cgit v1.2.3


From a759e37fb46708029c9c3c56c3b62e6f24d85cf5 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Sun, 18 Aug 2024 23:01:51 +0200
Subject: err.h: add ERR_PTR_PCPU(), PTR_ERR_PCPU() and IS_ERR_PCPU() macros

Add ERR_PTR_PCPU(), PTR_ERR_PCPU() and IS_ERR_PCPU() macros that operate
on pointers in the percpu address space.

These macros remove the need for (__force void *) function argument casts
(to avoid sparse -Wcast-from-as warnings).

The patch will also avoid future build errors due to pointer address space
mismatch with enabled strict percpu address space checks.

Link: https://lkml.kernel.org/r/20240818210235.33481-1-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/err.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/err.h b/include/linux/err.h
index b5d9bb2a2349..a4dacd745fcf 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -41,6 +41,9 @@ static inline void * __must_check ERR_PTR(long error)
 	return (void *) error;
 }
 
+/* Return the pointer in the percpu address space. */
+#define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error))
+
 /**
  * PTR_ERR - Extract the error code from an error pointer.
  * @ptr: An error pointer.
@@ -51,6 +54,9 @@ static inline long __must_check PTR_ERR(__force const void *ptr)
 	return (long) ptr;
 }
 
+/* Read an error pointer from the percpu address space. */
+#define PTR_ERR_PCPU(ptr) (PTR_ERR((const void *)(__force const unsigned long)(ptr)))
+
 /**
  * IS_ERR - Detect an error pointer.
  * @ptr: The pointer to check.
@@ -61,6 +67,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+/* Read an error pointer from the percpu address space. */
+#define IS_ERR_PCPU(ptr) (IS_ERR((const void *)(__force const unsigned long)(ptr)))
+
 /**
  * IS_ERR_OR_NULL - Detect an error pointer or a null pointer.
  * @ptr: The pointer to check.
-- 
cgit v1.2.3


From ef5f379de302884b9b7ad9b62587a942a9f0bb55 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 20 Aug 2024 14:22:10 +0200
Subject: mm: always inline _compound_head() with
 CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y

We already force-inline page_fixed_fake_head(), page_is_fake_head() and
PageTail(), however the compiler might decide that _compound_head() is not
worthy to be inlined, because of page_fixed_fake_head().

The result is that, for example, PageAnonExclusive() now might involve a
function call when checking PageHuge(), which performs a
page_folio()->_compound_head() call.  This can lead to a slight regression
of the stress-ng.clone benchmark.

This is not super-urgent to fix, but always inlining _compound_head()
seems like the obvious thing to do for this primitive, similar to the
other ones.

This change restores the slight regression and a compilation with
CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y shows no relevant bloat [2]:

	add/remove: 15/14 grow/shrink: 79/87 up/down: 12836/-13917 (-1081)
	...
	Total: Before=32786363, After=32785282, chg -0.00%

[1] https://lkml.kernel.org/r/817150f2-abf7-430f-9973-540bd6cdd26f@intel.com
[2] https://lore.kernel.org/all/116e117c-2821-401d-8e62-b85cdec37f4a@redhat.com/

Link: https://lkml.kernel.org/r/20240820122210.660140-1-david@redhat.com
Fixes: c0bff412e67b ("mm: allow anon exclusive check over hugetlb tail pages")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202407301049.5051dc19-oliver.sang@intel.com
Tested-by: Yin Fengwei <fengwei.yin@intel.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 17175a328e6b..2515ae85f191 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -232,7 +232,7 @@ static __always_inline int page_is_fake_head(const struct page *page)
 	return page_fixed_fake_head(page) != page;
 }
 
-static inline unsigned long _compound_head(const struct page *page)
+static __always_inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long head = READ_ONCE(page->compound_head);
 
-- 
cgit v1.2.3


From 0a2d82946be67e02fdf85a4010606bdc0546ba44 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Tue, 20 Aug 2024 10:26:39 +0800
Subject: mm: allow read-ahead with IOCB_NOWAIT set

Readahead support for IOCB_NOWAIT was introduced in commit 2e85abf053b9
("mm: allow read-ahead with IOCB_NOWAIT set").  However, this
implementation broke the semantics of IOCB_NOWAIT by potentially causing
it to wait on I/O during memory reclamation.  This behavior was later
modified in commit efa8480a8316 ("fs: RWF_NOWAIT should imply IOCB_NOIO").

To resolve the blocking issue during memory reclamation, we can use
memalloc_noio_{save,restore} to ensure non-blocking behavior.  This change
restores the original functionality, allowing preadv2(IOCB_NOWAIT) to
trigger readahead if the file content is not present in the page cache.

While this process may trigger direct memory reclamation, the
__GFP_NORETRY flag is set in the readahead GFP flags, ensuring it won't
block.

A use case for this change is when we want to trigger readahead in the
preadv2(2) syscall if the file cache is absent, but without waiting for
certain filesystem locks, like xfs_ilock.  A simple example is as follows:

retry:
    if (preadv2(fd, iovec, cnt, offset, RWF_NOWAIT) < 0) {
        do_other_work();
        goto retry;
    }

Link: https://lore.gnuweeb.org/io-uring/20200624164127.GP21350@casper.infradead.org/
Link: https://lkml.kernel.org/r/20240820022639.89562-1-laoar.shao@gmail.com
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6ca11e241a24..8610242ec2ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3461,7 +3461,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
 	if (flags & RWF_NOWAIT) {
 		if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
 			return -EOPNOTSUPP;
-		kiocb_flags |= IOCB_NOIO;
 	}
 	if (flags & RWF_ATOMIC) {
 		if (rw_type != WRITE)
-- 
cgit v1.2.3


From e27ad6560e4b5993315b56d6884ca5a4652468f4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 18:39:09 +0100
Subject: printf: remove %pGt support

Patch series "Increase the number of bits available in page_type".

Kent wants more than 16 bits in page_type, so I resurrected this old patch
and expanded it a bit.  It's a bit more efficient than our current scheme
(1 4-byte insn vs 3 insns of 13 bytes total) to test a single page type.


This patch (of 4):

An upcoming patch will convert page type from being a bitfield to a
single byte, so we will not be able to use %pG to print the page type
any more.  The printing of the symbolic name will be restored in that
patch.

Link: https://lkml.kernel.org/r/20240821173914.2270383-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20240821173914.2270383-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/mmflags.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b5c4da370a50..c151cc21d367 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -130,16 +130,6 @@ IF_HAVE_PG_ARCH_X(arch_3)
 	__def_pageflag_names						\
 	) : "none"
 
-#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
-
-#define __def_pagetype_names						\
-	DEF_PAGETYPE_NAME(slab),					\
-	DEF_PAGETYPE_NAME(hugetlb),					\
-	DEF_PAGETYPE_NAME(offline),					\
-	DEF_PAGETYPE_NAME(guard),					\
-	DEF_PAGETYPE_NAME(table),					\
-	DEF_PAGETYPE_NAME(buddy)
-
 #if defined(CONFIG_X86)
 #define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
 #elif defined(CONFIG_PPC)
-- 
cgit v1.2.3


From e880034cf7182aa35962bd30dfc9fa60476d56a4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 18:39:10 +0100
Subject: mm: introduce page_mapcount_is_type()

Resolve the awkward "and add one to this opaque constant" test into a
self-documenting inline function.

Link: https://lkml.kernel.org/r/20240821173914.2270383-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h         |  3 +--
 include/linux/page-flags.h | 12 +++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0fde51c0532f..6f6053d7ea6f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1228,8 +1228,7 @@ static inline int folio_mapcount(const struct folio *folio)
 
 	if (likely(!folio_test_large(folio))) {
 		mapcount = atomic_read(&folio->_mapcount) + 1;
-		/* Handle page_has_type() pages */
-		if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+		if (page_mapcount_is_type(mapcount))
 			mapcount = 0;
 		return mapcount;
 	}
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2515ae85f191..0b5484fdbca1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -956,12 +956,18 @@ enum pagetype {
 #define folio_test_type(folio, flag)					\
 	((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag))  == PAGE_TYPE_BASE)
 
-static inline int page_type_has_type(unsigned int page_type)
+static inline bool page_type_has_type(int page_type)
 {
-	return (int)page_type < PAGE_MAPCOUNT_RESERVE;
+	return page_type < PAGE_MAPCOUNT_RESERVE;
 }
 
-static inline int page_has_type(const struct page *page)
+/* This takes a mapcount which is one more than page->_mapcount */
+static inline bool page_mapcount_is_type(unsigned int mapcount)
+{
+	return page_type_has_type(mapcount - 1);
+}
+
+static inline bool page_has_type(const struct page *page)
 {
 	return page_type_has_type(READ_ONCE(page->page_type));
 }
-- 
cgit v1.2.3


From 4ffca5a96678c9fe1a39ec1e9c8fd326b57ea8a7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 18:39:11 +0100
Subject: mm: support only one page_type per page

By using a few values in the top byte, users of page_type can store up to
24 bits of additional data in page_type.  It also reduces the code size as
(with replacement of READ_ONCE() with data_race()), the kernel can check
just a single byte.  eg:

ffffffff811e3a79:       8b 47 30                mov    0x30(%rdi),%eax
ffffffff811e3a7c:       55                      push   %rbp
ffffffff811e3a7d:       48 89 e5                mov    %rsp,%rbp
ffffffff811e3a80:       25 00 00 00 82          and    $0x82000000,%eax
ffffffff811e3a85:       3d 00 00 00 80          cmp    $0x80000000,%eax
ffffffff811e3a8a:       74 4d                   je     ffffffff811e3ad9 <folio_mapping+0x69>

becomes:

ffffffff811e3a69:       80 7f 33 f5             cmpb   $0xf5,0x33(%rdi)
ffffffff811e3a6d:       55                      push   %rbp
ffffffff811e3a6e:       48 89 e5                mov    %rsp,%rbp
ffffffff811e3a71:       74 4d                   je     ffffffff811e3ac0 <folio_mapping+0x60>

replacing three instructions with one.

[wangkefeng.wang@huawei.com: fix ubsan warnings]
  Link: https://lkml.kernel.org/r/2d19c48a-c550-4345-bf36-d05cd303c5de@huawei.com
Link: https://lkml.kernel.org/r/20240821173914.2270383-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 68 +++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0b5484fdbca1..7a1b2948b075 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -923,42 +923,29 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #endif
 
 /*
- * For pages that are never mapped to userspace,
- * page_type may be used.  Because it is initialised to -1, we invert the
- * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
- * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
- * low bits so that an underflow or overflow of _mapcount won't be
- * mistaken for a page type value.
+ * For pages that do not use mapcount, page_type may be used.
+ * The low 24 bits of pagetype may be used for your own purposes, as long
+ * as you are careful to not affect the top 8 bits.  The low bits of
+ * pagetype will be overwritten when you clear the page_type from the page.
  */
-
 enum pagetype {
-	PG_buddy	= 0x40000000,
-	PG_offline	= 0x20000000,
-	PG_table	= 0x10000000,
-	PG_guard	= 0x08000000,
-	PG_hugetlb	= 0x04000000,
-	PG_slab		= 0x02000000,
-	PG_zsmalloc	= 0x01000000,
-	PG_unaccepted	= 0x00800000,
-
-	PAGE_TYPE_BASE	= 0x80000000,
-
-	/*
-	 * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and
-	 * allow owners that set a type to reuse the lower 16 bit for their own
-	 * purposes.
-	 */
-	PAGE_MAPCOUNT_RESERVE	= ~0x0000ffff,
+	/* 0x00-0x7f are positive numbers, ie mapcount */
+	/* Reserve 0x80-0xef for mapcount overflow. */
+	PGTY_buddy	= 0xf0,
+	PGTY_offline	= 0xf1,
+	PGTY_table	= 0xf2,
+	PGTY_guard	= 0xf3,
+	PGTY_hugetlb	= 0xf4,
+	PGTY_slab	= 0xf5,
+	PGTY_zsmalloc	= 0xf6,
+	PGTY_unaccepted	= 0xf7,
+
+	PGTY_mapcount_underflow = 0xff
 };
 
-#define PageType(page, flag)						\
-	((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
-#define folio_test_type(folio, flag)					\
-	((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag))  == PAGE_TYPE_BASE)
-
 static inline bool page_type_has_type(int page_type)
 {
-	return page_type < PAGE_MAPCOUNT_RESERVE;
+	return page_type < (PGTY_mapcount_underflow << 24);
 }
 
 /* This takes a mapcount which is one more than page->_mapcount */
@@ -969,40 +956,41 @@ static inline bool page_mapcount_is_type(unsigned int mapcount)
 
 static inline bool page_has_type(const struct page *page)
 {
-	return page_type_has_type(READ_ONCE(page->page_type));
+	return page_mapcount_is_type(data_race(page->page_type));
 }
 
 #define FOLIO_TYPE_OPS(lname, fname)					\
-static __always_inline bool folio_test_##fname(const struct folio *folio)\
+static __always_inline bool folio_test_##fname(const struct folio *folio) \
 {									\
-	return folio_test_type(folio, PG_##lname);			\
+	return data_race(folio->page.page_type >> 24) == PGTY_##lname;	\
 }									\
 static __always_inline void __folio_set_##fname(struct folio *folio)	\
 {									\
-	VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio);		\
-	folio->page.page_type &= ~PG_##lname;				\
+	VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX,	\
+			folio);						\
+	folio->page.page_type = (unsigned int)PGTY_##lname << 24;	\
 }									\
 static __always_inline void __folio_clear_##fname(struct folio *folio)	\
 {									\
 	VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio);		\
-	folio->page.page_type |= PG_##lname;				\
+	folio->page.page_type = UINT_MAX;				\
 }
 
 #define PAGE_TYPE_OPS(uname, lname, fname)				\
 FOLIO_TYPE_OPS(lname, fname)						\
 static __always_inline int Page##uname(const struct page *page)		\
 {									\
-	return PageType(page, PG_##lname);				\
+	return data_race(page->page_type >> 24) == PGTY_##lname;	\
 }									\
 static __always_inline void __SetPage##uname(struct page *page)		\
 {									\
-	VM_BUG_ON_PAGE(!PageType(page, 0), page);			\
-	page->page_type &= ~PG_##lname;					\
+	VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page);	\
+	page->page_type = (unsigned int)PGTY_##lname << 24;		\
 }									\
 static __always_inline void __ClearPage##uname(struct page *page)	\
 {									\
 	VM_BUG_ON_PAGE(!Page##uname(page), page);			\
-	page->page_type |= PG_##lname;					\
+	page->page_type = UINT_MAX;					\
 }
 
 /*
-- 
cgit v1.2.3


From bf03c806993091d8fb2fc0e98f07a8ef251c78f3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:34 +0100
Subject: mm: remove PageActive

Patch series "Simplify the page flags a little".

In the course of our folio conversions, we have made many page flags only
used on folios, so we can now remove the page-based accessors.  This
should cut down compile time a little, and prevent new users from cropping
up.

There is more that could be done in this area, but it would produce merge
conflicts, so I'll sit on those patches until next merge window.  We now
have line of sight to removing PG_private_2 and PG_private.


This patch (of 10):

This flag is now only used on folios, so we can remove all the page
accessors.

[akpm@linux-foundation.org: fix arch/powerpc/mm/pgtable-frag.c]
Link: https://lkml.kernel.org/r/20240821193445.2294269-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20240821193445.2294269-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 7a1b2948b075..34347bee1217 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -510,8 +510,9 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
 	__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
 PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
 	TESTCLEARFLAG(LRU, lru, PF_HEAD)
-PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
-	TESTCLEARFLAG(Active, active, PF_HEAD)
+FOLIO_FLAG(active, FOLIO_HEAD_PAGE)
+	__FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE)
+	FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE)
 PAGEFLAG(Workingset, workingset, PF_HEAD)
 	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
-- 
cgit v1.2.3


From 0b7582803649e0e58e5f8c9a4ede362777bc7eec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:35 +0100
Subject: mm: remove PageSwapBacked

This flag is now only used on folios, so we can remove all the page
accessors.

Link: https://lkml.kernel.org/r/20240821193445.2294269-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 34347bee1217..cebb3dc0fd0c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -528,9 +528,9 @@ PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
 PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
-PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
-	__CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
-	__SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
+FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
+	__FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE)
+	__FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE)
 
 /*
  * Private page markings that may be used by the filesystem that owns the page
-- 
cgit v1.2.3


From 6f394ee9ddb4bd1879e42c9c8648a37f650bea99 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:36 +0100
Subject: mm: remove PageReadahead

This flag is now only used on folios, so we can remove all the page
accessors.

Link: https://lkml.kernel.org/r/20240821193445.2294269-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index cebb3dc0fd0c..71444223d630 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -553,8 +553,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
 	TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
-PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
-	TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)
+FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE)
+	FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE)
 
 #ifdef CONFIG_HIGHMEM
 /*
-- 
cgit v1.2.3


From 32f51ead3d7771cdec29f75e08d50a76d2c6253d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:37 +0100
Subject: mm: remove PageSwapCache

This flag is now only used on folios, so we can remove all the page
accessors and reword the comments that refer to them.

Link: https://lkml.kernel.org/r/20240821193445.2294269-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h   |  2 +-
 include/linux/page-flags.h | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2419e60c9a7f..6e3bdf8e38bc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -109,7 +109,7 @@ struct page {
 			/**
 			 * @private: Mapping-private opaque data.
 			 * Usually used for buffer_heads if PagePrivate.
-			 * Used for swp_entry_t if PageSwapCache.
+			 * Used for swp_entry_t if swapcache flag set.
 			 * Indicates order in the buddy system if PageBuddy.
 			 */
 			unsigned long private;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 71444223d630..b31d2c50b6f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -574,15 +574,10 @@ static __always_inline bool folio_test_swapcache(const struct folio *folio)
 			test_bit(PG_swapcache, const_folio_flags(folio, 0));
 }
 
-static __always_inline bool PageSwapCache(const struct page *page)
-{
-	return folio_test_swapcache(page_folio(page));
-}
-
-SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
-CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
+FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE)
+FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE)
 #else
-PAGEFLAG_FALSE(SwapCache, swapcache)
+FOLIO_FLAG_FALSE(swapcache)
 #endif
 
 PAGEFLAG(Unevictable, unevictable, PF_HEAD)
-- 
cgit v1.2.3


From cb29e7941d5d57576392a52c08077ef327ec8e17 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:38 +0100
Subject: mm: remove PageUnevictable

There is only one caller of PageUnevictable() left; convert it to call
folio_test_unevictable() and remove all the page accessors.

Link: https://lkml.kernel.org/r/20240821193445.2294269-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b31d2c50b6f0..2150502195d5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -580,9 +580,9 @@ FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE)
 FOLIO_FLAG_FALSE(swapcache)
 #endif
 
-PAGEFLAG(Unevictable, unevictable, PF_HEAD)
-	__CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD)
-	TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD)
+FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE)
+	__FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE)
+	FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE)
 
 #ifdef CONFIG_MMU
 PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
-- 
cgit v1.2.3


From 99f86bbda31790f2ca0e365f02650585536929ab Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:39 +0100
Subject: mm: remove PageMlocked

This flag is now only used on folios, so we can remove all the page
accessors.

Link: https://lkml.kernel.org/r/20240821193445.2294269-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2150502195d5..08cce16c82aa 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -585,12 +585,15 @@ FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE)
 	FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE)
 
 #ifdef CONFIG_MMU
-PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
-	__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
-	TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
+FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE)
+	__FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE)
+	FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE)
+	FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE)
 #else
-PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
-	TESTSCFLAG_FALSE(Mlocked, mlocked)
+FOLIO_FLAG_FALSE(mlocked)
+	__FOLIO_CLEAR_FLAG_NOOP(mlocked)
+	FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked)
+	FOLIO_TEST_SET_FLAG_FALSE(mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
-- 
cgit v1.2.3


From 3026bc1e82b695d69cde21712512922abe74aab9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:40 +0100
Subject: mm: remove PageOwnerPriv1

While there are many aliases for this flag, nobody actually uses the
*PageOwnerPriv1() nor folio_*_owner_priv_1() accessors.  Remove them.

Link: https://lkml.kernel.org/r/20240821193445.2294269-8-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 08cce16c82aa..458d2dc5124d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -539,8 +539,6 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
  */
 PAGEFLAG(Private, private, PF_ANY)
 PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
-PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
-	TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
 
 /*
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
-- 
cgit v1.2.3


From 6dc151388e44957d471633ce70d72e3d27ae836d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:41 +0100
Subject: mm: remove page_has_private()

This function has no more callers, except folio_has_private().  Combine
the two functions.

Link: https://lkml.kernel.org/r/20240821193445.2294269-9-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 458d2dc5124d..017205048cab 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1175,20 +1175,15 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
 /**
- * page_has_private - Determine if page has private stuff
- * @page: The page to be checked
+ * folio_has_private - Determine if folio has private stuff
+ * @folio: The folio to be checked
  *
- * Determine if a page has private stuff, indicating that release routines
+ * Determine if a folio has private stuff, indicating that release routines
  * should be invoked upon it.
  */
-static inline int page_has_private(const struct page *page)
+static inline int folio_has_private(const struct folio *folio)
 {
-	return !!(page->flags & PAGE_FLAGS_PRIVATE);
-}
-
-static inline bool folio_has_private(const struct folio *folio)
-{
-	return page_has_private(&folio->page);
+	return !!(folio->flags & PAGE_FLAGS_PRIVATE);
 }
 
 #undef PF_ANY
-- 
cgit v1.2.3


From 02e1960aafac33721401dcd92e915325fdb524b2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:42 +0100
Subject: mm: rename PG_mappedtodisk to PG_owner_2

This flag has similar constraints to PG_owner_priv_1 -- it is ignored by
core code, and is entirely for the use of the code which allocated the
folio.  Since the pagecache does not use it, individual filesystems can
use it.  The bufferhead code does use it, so filesystems which use the
buffer cache must not use it for another purpose.

Link: https://lkml.kernel.org/r/20240821193445.2294269-10-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel-page-flags.h |  2 +-
 include/linux/page-flags.h        | 24 ++++++++++++++++--------
 include/trace/events/mmflags.h    |  2 +-
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 859f4b0c1b2b..7c587a711be1 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -10,7 +10,7 @@
  */
 #define KPF_RESERVED		32
 #define KPF_MLOCKED		33
-#define KPF_MAPPEDTODISK	34
+#define KPF_OWNER_2		34
 #define KPF_PRIVATE		35
 #define KPF_PRIVATE_2		36
 #define KPF_OWNER_PRIVATE	37
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 017205048cab..1ff3d172c22c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -101,12 +101,12 @@ enum pageflags {
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_active,
 	PG_workingset,
-	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
+	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use */
+	PG_owner_2,		/* Owner use. If pagecache, fs may use */
 	PG_arch_1,
 	PG_reserved,
 	PG_private,		/* If pagecache, has fs-private data */
 	PG_private_2,		/* If pagecache, has fs aux data */
-	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 	PG_unevictable,		/* Page is "unevictable"  */
@@ -131,6 +131,11 @@ enum pageflags {
 
 	PG_readahead = PG_reclaim,
 
+	/* Anonymous memory (and shmem) */
+	PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */
+	/* Some filesystems */
+	PG_checked = PG_owner_priv_1,
+
 	/*
 	 * Depending on the way an anonymous folio can be mapped into a page
 	 * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped
@@ -138,13 +143,13 @@ enum pageflags {
 	 * tail pages of an anonymous folio. For now, we only expect it to be
 	 * set on tail pages for PTE-mapped THP.
 	 */
-	PG_anon_exclusive = PG_mappedtodisk,
-
-	/* Filesystems */
-	PG_checked = PG_owner_priv_1,
+	PG_anon_exclusive = PG_owner_2,
 
-	/* SwapBacked */
-	PG_swapcache = PG_owner_priv_1,	/* Swap page: swp_entry_t in private */
+	/*
+	 * Set if all buffer heads in the folio are mapped.
+	 * Filesystems which do not use BHs can use it for their own purpose.
+	 */
+	PG_mappedtodisk = PG_owner_2,
 
 	/* Two page bits are conscripted by FS-Cache to maintain local caching
 	 * state.  These bits are set on pages belonging to the netfs's inodes
@@ -540,6 +545,9 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
 PAGEFLAG(Private, private, PF_ANY)
 PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
 
+/* owner_2 can be set on tail pages for anon memory */
+FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
+
 /*
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
  * risky: they bypass page accounting.
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index c151cc21d367..3b51558cdc9b 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -107,13 +107,13 @@
 	DEF_PAGEFLAG_NAME(active),					\
 	DEF_PAGEFLAG_NAME(workingset),					\
 	DEF_PAGEFLAG_NAME(owner_priv_1),				\
+	DEF_PAGEFLAG_NAME(owner_2),					\
 	DEF_PAGEFLAG_NAME(arch_1),					\
 	DEF_PAGEFLAG_NAME(reserved),					\
 	DEF_PAGEFLAG_NAME(private),					\
 	DEF_PAGEFLAG_NAME(private_2),					\
 	DEF_PAGEFLAG_NAME(writeback),					\
 	DEF_PAGEFLAG_NAME(head),					\
-	DEF_PAGEFLAG_NAME(mappedtodisk),				\
 	DEF_PAGEFLAG_NAME(reclaim),					\
 	DEF_PAGEFLAG_NAME(swapbacked),					\
 	DEF_PAGEFLAG_NAME(unevictable)					\
-- 
cgit v1.2.3


From 7a87225ae2c6c317c7b80cf599e5cf0eee699196 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Aug 2024 20:34:43 +0100
Subject: x86: remove PG_uncached

Convert x86 to use PG_arch_2 instead of PG_uncached and remove
PG_uncached.

Link: https://lkml.kernel.org/r/20240821193445.2294269-11-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel-page-flags.h |  1 -
 include/linux/page-flags.h        | 13 +++----------
 include/trace/events/mmflags.h    | 23 +++++++++++------------
 3 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 7c587a711be1..196778a087c4 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -15,7 +15,6 @@
 #define KPF_PRIVATE_2		36
 #define KPF_OWNER_PRIVATE	37
 #define KPF_ARCH		38
-#define KPF_UNCACHED		39
 #define KPF_SOFTDIRTY		40
 #define KPF_ARCH_2		41
 #define KPF_ARCH_3		42
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1ff3d172c22c..2175ebceb41c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -113,9 +113,6 @@ enum pageflags {
 #ifdef CONFIG_MMU
 	PG_mlocked,		/* Page is vma mlocked */
 #endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-	PG_uncached,		/* Page has been mapped as uncached */
-#endif
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
@@ -123,8 +120,10 @@ enum pageflags {
 	PG_young,
 	PG_idle,
 #endif
-#ifdef CONFIG_ARCH_USES_PG_ARCH_X
+#ifdef CONFIG_ARCH_USES_PG_ARCH_2
 	PG_arch_2,
+#endif
+#ifdef CONFIG_ARCH_USES_PG_ARCH_3
 	PG_arch_3,
 #endif
 	__NR_PAGEFLAGS,
@@ -602,12 +601,6 @@ FOLIO_FLAG_FALSE(mlocked)
 	FOLIO_TEST_SET_FLAG_FALSE(mlocked)
 #endif
 
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
-#else
-PAGEFLAG_FALSE(Uncached, uncached)
-#endif
-
 #ifdef CONFIG_MEMORY_FAILURE
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 3b51558cdc9b..58f2699331b6 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -71,12 +71,6 @@
 #define IF_HAVE_PG_MLOCK(_name)
 #endif
 
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-#define IF_HAVE_PG_UNCACHED(_name) ,{1UL << PG_##_name, __stringify(_name)}
-#else
-#define IF_HAVE_PG_UNCACHED(_name)
-#endif
-
 #ifdef CONFIG_MEMORY_FAILURE
 #define IF_HAVE_PG_HWPOISON(_name) ,{1UL << PG_##_name, __stringify(_name)}
 #else
@@ -89,10 +83,16 @@
 #define IF_HAVE_PG_IDLE(_name)
 #endif
 
-#ifdef CONFIG_ARCH_USES_PG_ARCH_X
-#define IF_HAVE_PG_ARCH_X(_name) ,{1UL << PG_##_name, __stringify(_name)}
+#ifdef CONFIG_ARCH_USES_PG_ARCH_2
+#define IF_HAVE_PG_ARCH_2(_name) ,{1UL << PG_##_name, __stringify(_name)}
+#else
+#define IF_HAVE_PG_ARCH_2(_name)
+#endif
+
+#ifdef CONFIG_ARCH_USES_PG_ARCH_3
+#define IF_HAVE_PG_ARCH_3(_name) ,{1UL << PG_##_name, __stringify(_name)}
 #else
-#define IF_HAVE_PG_ARCH_X(_name)
+#define IF_HAVE_PG_ARCH_3(_name)
 #endif
 
 #define DEF_PAGEFLAG_NAME(_name) { 1UL <<  PG_##_name, __stringify(_name) }
@@ -118,12 +118,11 @@
 	DEF_PAGEFLAG_NAME(swapbacked),					\
 	DEF_PAGEFLAG_NAME(unevictable)					\
 IF_HAVE_PG_MLOCK(mlocked)						\
-IF_HAVE_PG_UNCACHED(uncached)						\
 IF_HAVE_PG_HWPOISON(hwpoison)						\
 IF_HAVE_PG_IDLE(idle)							\
 IF_HAVE_PG_IDLE(young)							\
-IF_HAVE_PG_ARCH_X(arch_2)						\
-IF_HAVE_PG_ARCH_X(arch_3)
+IF_HAVE_PG_ARCH_2(arch_2)						\
+IF_HAVE_PG_ARCH_3(arch_3)
 
 #define show_page_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
-- 
cgit v1.2.3


From 0ca0c24e3211586d44a31b3c4767fe3f43a008a7 Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Fri, 23 Aug 2024 20:04:39 +0100
Subject: mm: store zero pages to be swapped out in a bitmap

Patch series "mm: store zero pages to be swapped out in a bitmap", v8.

As shown in the patch series that introduced the zswap same-filled
optimization [1], 10-20% of the pages stored in zswap are same-filled.
This is also observed across Meta's server fleet.  By using VM counters in
swap_writepage (not included in this patchseries) it was found that less
than 1% of the same-filled pages to be swapped out are non-zero pages.

For conventional swap setup (without zswap), rather than reading/writing
these pages to flash resulting in increased I/O and flash wear, a bitmap
can be used to mark these pages as zero at write time, and the pages can
be filled at read time if the bit corresponding to the page is set.

When using zswap with swap, this also means that a zswap_entry does not
need to be allocated for zero filled pages resulting in memory savings
which would offset the memory used for the bitmap.

A similar attempt was made earlier in [2] where zswap would only track
zero-filled pages instead of same-filled.  This patchseries adds
zero-filled pages optimization to swap (hence it can be used even if zswap
is disabled) and removes the same-filled code from zswap (as only 1% of
the same-filled pages are non-zero), simplifying code.

[1] https://lore.kernel.org/all/20171018104832epcms5p1b2232e2236258de3d03d1344dde9fce0@epcms5p1/
[2] https://lore.kernel.org/lkml/20240325235018.2028408-1-yosryahmed@google.com/


This patch (of 2):

Approximately 10-20% of pages to be swapped out are zero pages [1].
Rather than reading/writing these pages to flash resulting
in increased I/O and flash wear, a bitmap can be used to mark these
pages as zero at write time, and the pages can be filled at
read time if the bit corresponding to the page is set.
With this patch, NVMe writes in Meta server fleet decreased
by almost 10% with conventional swap setup (zswap disabled).

[1] https://lore.kernel.org/all/20171018104832epcms5p1b2232e2236258de3d03d1344dde9fce0@epcms5p1/

Link: https://lkml.kernel.org/r/20240823190545.979059-1-usamaarif642@gmail.com
Link: https://lkml.kernel.org/r/20240823190545.979059-2-usamaarif642@gmail.com
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 248db1dd7812..ca533b478c21 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -296,6 +296,7 @@ struct swap_info_struct {
 	signed char	type;		/* strange name for an index */
 	unsigned int	max;		/* extent of the swap_map */
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
+	unsigned long *zeromap;		/* kvmalloc'ed bitmap to track zero pages */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
 	struct list_head free_clusters; /* free clusters list */
 	struct list_head full_clusters; /* full clusters list */
-- 
cgit v1.2.3


From 63fc66f5b6b18f39269a66cf34d8cb7a24fbfe88 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Fri, 30 Aug 2024 00:00:58 -0400
Subject: ipc/shm, mm: drop do_vma_munmap()

The do_vma_munmap() wrapper existed for callers that didn't have a vma
iterator and needed to check the vma mseal status prior to calling the
underlying munmap().  All callers now use a vma iterator and since the
mseal check has been moved to do_vmi_align_munmap() and the vmas are
aligned, this function can just be called instead.

do_vmi_align_munmap() can no longer be static as ipc/shm is using it and
it is exported via the mm.h header.

Link: https://lkml.kernel.org/r/20240830040101.822209-19-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6f6053d7ea6f..b0ff06d18c71 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3287,14 +3287,14 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
 extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 			 unsigned long start, size_t len, struct list_head *uf,
 			 bool unlock);
+int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
+		    struct mm_struct *mm, unsigned long start,
+		    unsigned long end, struct list_head *uf, bool unlock);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
 extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
 
 #ifdef CONFIG_MMU
-extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
-			 unsigned long start, unsigned long end,
-			 struct list_head *uf, bool unlock);
 extern int __mm_populate(unsigned long addr, unsigned long len,
 			 int ignore_errors);
 static inline void mm_populate(unsigned long addr, unsigned long len)
-- 
cgit v1.2.3


From f1264e9531b0fbadf88e0b9c8143c546343b481c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 27 Aug 2024 19:47:27 +0800
Subject: mm: migrate: add isolate_folio_to_list()

Add isolate_folio_to_list() helper to try to isolate HugeTLB, no-LRU
movable and LRU folios to a list, which will be reused by
do_migrate_range() from memory hotplug soon, also drop the
mf_isolate_folio() since we could directly use new helper in the
soft_offline_in_use_page().

Link: https://lkml.kernel.org/r/20240827114728.3212578-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Tested-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 644be30b69c8..002e49b2ebd9 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -70,6 +70,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
 		  unsigned int *ret_succeeded);
 struct folio *alloc_migration_target(struct folio *src, unsigned long private);
 bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
@@ -91,6 +92,8 @@ static inline struct folio *alloc_migration_target(struct folio *src,
 	{ return NULL; }
 static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode)
 	{ return false; }
+static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list)
+	{ return false; }
 
 static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct folio *dst, struct folio *src)
-- 
cgit v1.2.3


From d29e741cad3f8f41df1834bf74df79380c1c6c6d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 19 Aug 2024 17:17:04 +0200
Subject: gpio: davinci: drop platform data support

There are no more any board files that use the platform data for
gpio-davinci. We can remove the header defining it and port the code to
no longer store any context in pdata.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240819151705.37258-1-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 include/linux/platform_data/gpio-davinci.h

(limited to 'include')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
deleted file mode 100644
index b82e44662efe..000000000000
--- a/include/linux/platform_data/gpio-davinci.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * DaVinci GPIO Platform Related Defines
- *
- * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/
- */
-
-#ifndef __DAVINCI_GPIO_PLATFORM_H
-#define __DAVINCI_GPIO_PLATFORM_H
-
-struct davinci_gpio_platform_data {
-	bool	no_auto_base;
-	u32	base;
-	u32	ngpio;
-	u32	gpio_unbanked;
-};
-
-/* Convert GPIO signal to GPIO pin number */
-#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
-
-#endif
-- 
cgit v1.2.3


From 8b1451f2f723f845c05b8bad3d4c45de284338b5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 3 Sep 2024 21:54:28 -1000
Subject: sched_ext: Replace SCX_TASK_BAL_KEEP with SCX_RQ_BAL_KEEP

SCX_TASK_BAL_KEEP is used by balance_one() to tell pick_next_task_scx() to
keep running the current task. It's not really a task property. Replace it
with SCX_RQ_BAL_KEEP which resides in rq->scx.flags and is a better fit for
the usage. Also, the existing clearing rule is unnecessarily strict and
makes it difficult to use with core-sched. Just clear it on entry to
balance_one().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 69f68e2121a8..db2a266113ac 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -71,7 +71,6 @@ struct scx_dispatch_q {
 /* scx_entity.flags */
 enum scx_ent_flags {
 	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
-	SCX_TASK_BAL_KEEP	= 1 << 1, /* balance decided to keep current */
 	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
 	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
 
-- 
cgit v1.2.3


From 47f218d108950984b24af81f66356ceda380eb74 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 29 Aug 2024 21:06:17 -0300
Subject: iommu/amd: Store the nid in io_pgtable_cfg instead of the domain

We already have memory in the union here that is being wasted in AMD's
case, use it to store the nid.

Putting the nid here further isolates the io_pgtable code from the struct
protection_domain.

Fixup protection_domain_alloc so that the NID from the device is provided,
at this point dev is never NULL for AMD so this will now allocate the
first table pointer on the correct NUMA node.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/8-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/io-pgtable.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index f9a81761bfce..b1ecfc3cd5bc 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -171,6 +171,10 @@ struct io_pgtable_cfg {
 			u64 ttbr[4];
 			u32 n_ttbrs;
 		} apple_dart_cfg;
+
+		struct {
+			int nid;
+		} amd;
 	};
 };
 
-- 
cgit v1.2.3


From b6ecc662037694488bfff7c9fd21c405df8411f2 Mon Sep 17 00:00:00 2001
From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
Date: Mon, 2 Sep 2024 05:43:47 -0700
Subject: net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup

Currently napi_disable() gets called during rxq and txq cleanup,
even before napi is enabled and hrtimer is initialized. It causes
kernel panic.

? page_fault_oops+0x136/0x2b0
  ? page_counter_cancel+0x2e/0x80
  ? do_user_addr_fault+0x2f2/0x640
  ? refill_obj_stock+0xc4/0x110
  ? exc_page_fault+0x71/0x160
  ? asm_exc_page_fault+0x27/0x30
  ? __mmdrop+0x10/0x180
  ? __mmdrop+0xec/0x180
  ? hrtimer_active+0xd/0x50
  hrtimer_try_to_cancel+0x2c/0xf0
  hrtimer_cancel+0x15/0x30
  napi_disable+0x65/0x90
  mana_destroy_rxq+0x4c/0x2f0
  mana_create_rxq.isra.0+0x56c/0x6d0
  ? mana_uncfg_vport+0x50/0x50
  mana_alloc_queues+0x21b/0x320
  ? skb_dequeue+0x5f/0x80

Cc: stable@vger.kernel.org
Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mana/mana.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 7caa334f4888..b8a6c7504ee1 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -98,6 +98,8 @@ struct mana_txq {
 
 	atomic_t pending_sends;
 
+	bool napi_initialized;
+
 	struct mana_stats_tx stats;
 };
 
-- 
cgit v1.2.3


From 9f82f15ddfdd60bb9820f09737333b2345e22ab3 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Thu, 22 Aug 2024 16:10:46 +0100
Subject: mm: use ARCH_PKEY_BITS to define VM_PKEY_BITN

Use the new CONFIG_ARCH_PKEY_BITS to simplify setting these bits
for different architectures.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-mm@kvack.org
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20240822151113.1479789-4-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/mm.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4b238a20b76..fb6ccd93f589 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -330,12 +330,16 @@ extern unsigned int kobjsize(const void *objp);
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
-# define VM_PKEY_SHIFT	VM_HIGH_ARCH_BIT_0
-# define VM_PKEY_BIT0	VM_HIGH_ARCH_0	/* A protection key is a 4-bit value */
-# define VM_PKEY_BIT1	VM_HIGH_ARCH_1	/* on x86 and 5-bit value on ppc64   */
-# define VM_PKEY_BIT2	VM_HIGH_ARCH_2
-# define VM_PKEY_BIT3	VM_HIGH_ARCH_3
-#ifdef CONFIG_PPC
+# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0  VM_HIGH_ARCH_0
+# define VM_PKEY_BIT1  VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2  VM_HIGH_ARCH_2
+#if CONFIG_ARCH_PKEY_BITS > 3
+# define VM_PKEY_BIT3  VM_HIGH_ARCH_3
+#else
+# define VM_PKEY_BIT3  0
+#endif
+#if CONFIG_ARCH_PKEY_BITS > 4
 # define VM_PKEY_BIT4  VM_HIGH_ARCH_4
 #else
 # define VM_PKEY_BIT4  0
-- 
cgit v1.2.3


From facaa1373c9aabf8e34109a9cb205ad0f3a8584e Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Thu, 22 Aug 2024 16:10:55 +0100
Subject: arm64: re-order MTE VM_ flags

VM_PKEY_BIT[012] will use VM_HIGH_ARCH_[012], move the MTE VM flags to
accommodate this.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20240822151113.1479789-13-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fb6ccd93f589..406512c16471 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -378,8 +378,8 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #if defined(CONFIG_ARM64_MTE)
-# define VM_MTE		VM_HIGH_ARCH_0	/* Use Tagged memory for access control */
-# define VM_MTE_ALLOWED	VM_HIGH_ARCH_1	/* Tagged memory permitted */
+# define VM_MTE		VM_HIGH_ARCH_4	/* Use Tagged memory for access control */
+# define VM_MTE_ALLOWED	VM_HIGH_ARCH_5	/* Tagged memory permitted */
 #else
 # define VM_MTE		VM_NONE
 # define VM_MTE_ALLOWED	VM_NONE
-- 
cgit v1.2.3


From 17519819926211e6b2834e00e4554bec0daf22ac Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Thu, 22 Aug 2024 16:11:03 +0100
Subject: arm64/ptrace: add support for FEAT_POE

Add a regset for POE containing POR_EL0.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20240822151113.1479789-21-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..81762ff3c99e 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -441,6 +441,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_ZA	0x40c		/* ARM SME ZA registers */
 #define NT_ARM_ZT	0x40d		/* ARM SME ZT registers */
 #define NT_ARM_FPMR	0x40e		/* ARM floating point mode register */
+#define NT_ARM_POE	0x40f		/* ARM POE registers */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From c6fb88a5270f4ba24859f5ecb61cfc731ef16300 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Wed, 4 Sep 2024 21:51:50 +0900
Subject: firewire: core: allocate workqueue to handle isochronous contexts in
 card

This commit adds a workqueue dedicated for isochronous context processing.

The workqueue is allocated per instance of fw_card structure to satisfy the
following characteristics descending from 1394 OHCI specification:

In 1394 OHCI specification, memory pages are reserved to each isochronous
context dedicated to DMA transmission. It allows to operate these
per-context pages concurrently. Software can schedule hardware interrupt
for several isochronous context to the same cycle, thus WQ_UNBOUND is
specified. Additionally, it is sleepable to operate the content of pages,
thus WQ_BH is not used.

The isochronous context delivers the packets with time stamp, thus
WQ_HIGHPRI is specified for semi real-time data such as IEC 61883-1/6
protocol implemented by ALSA firewire stack. The isochronous context is not
used by the implementation of SCSI over IEEE1394 protocol (sbp2), thus
WQ_MEM_RECLAIM is not specified.

It is useful for users to adjust cpu affinity of the workqueue depending
on their work loads, thus WQ_SYS is specified to expose the attributes to
user space.

Tested-by: Edmund Raile <edmund.raile@protonmail.com>
Link: https://lore.kernel.org/r/20240904125155.461886-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 1cca14cf5652..10e135d60824 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -134,6 +134,8 @@ struct fw_card {
 	__be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
 
 	__be32 maint_utility_register;
+
+	struct workqueue_struct *isoc_wq;
 };
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
-- 
cgit v1.2.3


From 4f55ad754d6b7b6fa4ebcfd8c50f7e53e661a78e Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Wed, 4 Sep 2024 21:51:51 +0900
Subject: firewire: core: add local API to queue work item to workqueue
 specific to isochronous contexts

In the previous commit, the workqueue is added per the instance of fw_card
structure for isochronous contexts. The workqueue is designed to be used by
the implementation of fw_card_driver structure underlying the fw_card.

This commit adds some local APIs to be used by the implementation.

Tested-by: Edmund Raile <edmund.raile@protonmail.com>
Link: https://lore.kernel.org/r/20240904125155.461886-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 10e135d60824..72f497b61739 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -511,6 +511,7 @@ union fw_iso_callback {
 
 struct fw_iso_context {
 	struct fw_card *card;
+	struct work_struct work;
 	int type;
 	int channel;
 	int speed;
-- 
cgit v1.2.3


From bd07d864522e7c3e4ee364e91aee8754992f5855 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:20 +0206
Subject: printk: nbcon: Add function for printers to reacquire ownership

Since ownership can be lost at any time due to handover or
takeover, a printing context _must_ be prepared to back out
immediately and carefully. However, there are scenarios where
the printing context must reacquire ownership in order to
finalize or revert hardware changes.

One such example is when interrupts are disabled during
printing. No other context will automagically re-enable the
interrupts. For this case, the disabling context _must_
reacquire nbcon ownership so that it can re-enable the
interrupts.

Provide nbcon_reacquire_nobuf() for exactly this purpose. It
allows a printing context to reacquire ownership using the same
priority as its previous ownership.

Note that after a successful reacquire the printing context
will have no output buffer because that has been lost. This
function cannot be used to resume printing.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-2-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 9a13f91b0c43..88050d30a9cc 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -366,6 +366,10 @@ struct console {
 	 *
 	 * The callback should allow the takeover whenever it is safe. It
 	 * increases the chance to see messages when the system is in trouble.
+	 * If the driver must reacquire ownership in order to finalize or
+	 * revert hardware changes, nbcon_reacquire_nobuf() can be used.
+	 * However, on reacquire the buffer content is no longer available. A
+	 * reacquire cannot be used to resume printing.
 	 *
 	 * The callback can be called from any context (including NMI).
 	 * Therefore it must avoid usage of any locking and instead rely
@@ -558,12 +562,14 @@ extern void nbcon_cpu_emergency_exit(void);
 extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
 extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
 extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
+extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt);
 #else
 static inline void nbcon_cpu_emergency_enter(void) { }
 static inline void nbcon_cpu_emergency_exit(void) { }
 static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
+static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { }
 #endif
 
 extern int console_set_on_cmdline;
-- 
cgit v1.2.3


From 76f258bf3f2aae570209319944703a92ac64e29e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:25 +0206
Subject: printk: nbcon: Introduce printer kthreads

Provide the main implementation for running a printer kthread
per nbcon console that is takeover/handover aware. This
includes:

- new mandatory write_thread() callback
- kthread creation
- kthread main printing loop
- kthread wakeup mechanism
- kthread shutdown

kthread creation is a bit tricky because consoles may register
before kthreads can be created. In such cases, registration
will succeed, even though no kthread exists. Once kthreads can
be created, an early_initcall will set @printk_kthreads_ready.
If there are no registered boot consoles, the early_initcall
creates the kthreads for all registered nbcon consoles. If
kthread creation fails, the related console is unregistered.

If there are registered boot consoles when
@printk_kthreads_ready is set, no kthreads are created until
the final boot console unregisters.

Once kthread creation finally occurs, @printk_kthreads_running
is set so that the system knows kthreads are available for all
registered nbcon consoles.

If @printk_kthreads_running is already set when the console
is registering, the kthread is created during registration. If
kthread creation fails, the registration will fail.

Until @printk_kthreads_running is set, console printing occurs
directly via the console_lock.

kthread shutdown on system shutdown/reboot is necessary to
ensure the printer kthreads finish their printing so that the
system can cleanly transition back to direct printing via the
console_lock in order to reliably push out the final
shutdown/reboot messages. @printk_kthreads_running is cleared
before shutting down the individual kthreads.

The kthread uses a new mandatory write_thread() callback that
is called with both device_lock() and the console context
acquired.

The console ownership handling is necessary for synchronization
against write_atomic() which is synchronized only via the
console context ownership.

The device_lock() serializes acquiring the console context with
NBCON_PRIO_NORMAL. It is needed in case the device_lock() does
not disable preemption. It prevents the following race:

CPU0				CPU1

 [ task A ]

 nbcon_context_try_acquire()
   # success with NORMAL prio
   # .unsafe == false;  // safe for takeover

 [ schedule: task A -> B ]

				WARN_ON()
				  nbcon_atomic_flush_pending()
				    nbcon_context_try_acquire()
				      # success with EMERGENCY prio

				      # flushing
				      nbcon_context_release()

				      # HERE: con->nbcon_state is free
				      #       to take by anyone !!!

 nbcon_context_try_acquire()
   # success with NORMAL prio [ task B ]

 [ schedule: task B -> A ]

 nbcon_enter_unsafe()
   nbcon_context_can_proceed()

BUG: nbcon_context_can_proceed() returns "true" because
     the console is owned by a context on CPU0 with
     NBCON_PRIO_NORMAL.

     But it should return "false". The console is owned
     by a context from task B and we do the check
     in a context from task A.

Note that with these changes, the printer kthreads do not yet
take over full responsibility for nbcon printing during normal
operation. These changes only focus on the lifecycle of the
kthreads.

Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-7-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 88050d30a9cc..788ce9c829f6 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,7 +16,9 @@
 
 #include <linux/atomic.h>
 #include <linux/bits.h>
+#include <linux/irq_work.h>
 #include <linux/rculist.h>
+#include <linux/rcuwait.h>
 #include <linux/types.h>
 #include <linux/vesa.h>
 
@@ -324,6 +326,9 @@ struct nbcon_write_context {
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
  * @nbcon_device_ctxt:	Context available for non-printing operations
  * @pbufs:		Pointer to nbcon private buffer
+ * @kthread:		Printer kthread for this console
+ * @rcuwait:		RCU-safe wait object for @kthread waking
+ * @irq_work:		Defer @kthread waking to IRQ work context
  */
 struct console {
 	char			name[16];
@@ -377,6 +382,37 @@ struct console {
 	 */
 	void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
 
+	/**
+	 * @write_thread:
+	 *
+	 * NBCON callback to write out text in task context.
+	 *
+	 * This callback must be called only in task context with both
+	 * device_lock() and the nbcon console acquired with
+	 * NBCON_PRIO_NORMAL.
+	 *
+	 * The same rules for console ownership verification and unsafe
+	 * sections handling applies as with write_atomic().
+	 *
+	 * The console ownership handling is necessary for synchronization
+	 * against write_atomic() which is synchronized only via the context.
+	 *
+	 * The device_lock() provides the primary serialization for operations
+	 * on the device. It might be as relaxed (mutex)[*] or as tight
+	 * (disabled preemption and interrupts) as needed. It allows
+	 * the kthread to operate in the least restrictive mode[**].
+	 *
+	 * [*] Standalone nbcon_context_try_acquire() is not safe with
+	 *     the preemption enabled, see nbcon_owner_matches(). But it
+	 *     can be safe when always called in the preemptive context
+	 *     under the device_lock().
+	 *
+	 * [**] The device_lock() makes sure that nbcon_context_try_acquire()
+	 *      would never need to spin which is important especially with
+	 *      PREEMPT_RT.
+	 */
+	void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt);
+
 	/**
 	 * @device_lock:
 	 *
@@ -423,7 +459,11 @@ struct console {
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
 	struct nbcon_context	__private nbcon_device_ctxt;
+
 	struct printk_buffers	*pbufs;
+	struct task_struct	*kthread;
+	struct rcuwait		rcuwait;
+	struct irq_work		irq_work;
 };
 
 #ifdef CONFIG_LOCKDEP
-- 
cgit v1.2.3


From 5102981d5e2a5a7a12424b5d26c7524ac2899898 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:30 +0206
Subject: printk: nbcon: Show replay message on takeover

An emergency or panic context can takeover console ownership
while the current owner was printing a printk message. The
atomic printer will re-print the message that the previous
owner was printing. However, this can look confusing to the
user and may even seem as though a message was lost.

  [3430014.1
  [3430014.181123] usb 1-2: Product: USB Audio

Add a new field @nbcon_prev_seq to struct console to track
the sequence number to print that was assigned to the previous
console owner. If this matches the sequence number to print
that the current owner is assigned, then a takeover must have
occurred. In this case, print an additional message to inform
the user that the previous message is being printed again.

  [3430014.1
  ** replaying previous printk message **
  [3430014.181123] usb 1-2: Product: USB Audio

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-12-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 788ce9c829f6..eba367bf605d 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -325,6 +325,7 @@ struct nbcon_write_context {
  * @nbcon_state:	State for nbcon consoles
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
  * @nbcon_device_ctxt:	Context available for non-printing operations
+ * @nbcon_prev_seq:	Seq num the previous nbcon owner was assigned to print
  * @pbufs:		Pointer to nbcon private buffer
  * @kthread:		Printer kthread for this console
  * @rcuwait:		RCU-safe wait object for @kthread waking
@@ -459,6 +460,7 @@ struct console {
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
 	struct nbcon_context	__private nbcon_device_ctxt;
+	atomic_long_t           __private nbcon_prev_seq;
 
 	struct printk_buffers	*pbufs;
 	struct task_struct	*kthread;
-- 
cgit v1.2.3


From a4d398a573d0f0c98c39d4af1866a3edaec4959c Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Mon, 2 Sep 2024 07:43:23 +0100
Subject: ARM: 9416/1: amba: make amba_bustype constant

Since commit d492cc2573a0 ("driver core: device.h: make struct
bus_type a const *"), the driver core can properly handle constant
struct bus_type, move the amba_bustype variable to be a constant
structure as well, placing it into read-only memory which can not be
modified at runtime.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 include/linux/amba/bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 958a55bcc708..dda2f3ea89cb 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -105,7 +105,7 @@ enum amba_vendor {
 	AMBA_VENDOR_LSI = 0xb6,
 };
 
-extern struct bus_type amba_bustype;
+extern const struct bus_type amba_bustype;
 
 #define to_amba_device(d)	container_of_const(d, struct amba_device, dev)
 
-- 
cgit v1.2.3


From 593377036e50de89132bc1222800174fde0780ec Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 10 May 2024 23:05:56 -0300
Subject: kvm: Note an RCU quiescent state on guest exit

As of today, KVM notes a quiescent state only in guest entry, which is good
as it avoids the guest being interrupted for current RCU operations.

While the guest vcpu runs, it can be interrupted by a timer IRQ that will
check for any RCU operations waiting for this CPU. In case there are any of
such, it invokes rcu_core() in order to sched-out the current thread and
note a quiescent state.

This occasional schedule work will introduce tens of microsseconds of
latency, which is really bad for vcpus running latency-sensitive
applications, such as real-time workloads.

So, note a quiescent state in guest exit, so the interrupted guests is able
to deal with any pending RCU operations before being required to invoke
rcu_core(), and thus avoid the overhead of related scheduler work.

Signed-off-by: Leonardo Bras <leobras@redhat.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240511020557.1198200-1-leobras@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  6 ++++--
 include/linux/kvm_host.h         | 10 +++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 6e76b9dba00e..8a78fabeafc3 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -80,10 +80,12 @@ static __always_inline bool context_tracking_guest_enter(void)
 	return context_tracking_enabled_this_cpu();
 }
 
-static __always_inline void context_tracking_guest_exit(void)
+static __always_inline bool context_tracking_guest_exit(void)
 {
 	if (context_tracking_enabled())
 		__ct_user_exit(CONTEXT_GUEST);
+
+	return context_tracking_enabled_this_cpu();
 }
 
 #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
@@ -98,7 +100,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline int ct_state(void) { return -1; }
 static inline int __ct_state(void) { return -1; }
 static __always_inline bool context_tracking_guest_enter(void) { return false; }
-static __always_inline void context_tracking_guest_exit(void) { }
+static __always_inline bool context_tracking_guest_exit(void) { return false; }
 #define CT_WARN_ON(cond) do { } while (0)
 #endif /* !CONFIG_CONTEXT_TRACKING_USER */
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b23c6d48392f..0d5125a3e31a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -485,7 +485,15 @@ static __always_inline void guest_state_enter_irqoff(void)
  */
 static __always_inline void guest_context_exit_irqoff(void)
 {
-	context_tracking_guest_exit();
+	/*
+	 * Guest mode is treated as a quiescent state, see
+	 * guest_context_enter_irqoff() for more details.
+	 */
+	if (!context_tracking_guest_exit()) {
+		instrumentation_begin();
+		rcu_virt_note_context_switch();
+		instrumentation_end();
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 2097154a10c6ee78be8796411e5d0ad81ee06ed6 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Tue, 3 Sep 2024 17:16:12 +0200
Subject: namespace: introduce SB_I_NOIDMAP flag

Right now we determine if filesystem support vfs idmappings or not basing
on the FS_ALLOW_IDMAP flag presence. This "static" way works perfecly well
for local filesystems like ext4, xfs, btrfs, etc. But for network-like
filesystems like fuse, cephfs this approach is not ideal, because sometimes
proper support of vfs idmaps requires some extensions for the on-wire
protocol, which implies that changes have to be made not only in the Linux
kernel code but also in the 3rd party components like libfuse, cephfs MDS
server and so on.

We have seen that issue during our work on cephfs idmapped mounts [1] with
Christian, but right now I'm working on the idmapped mounts support for
fuse/virtiofs and I think that it is a right time for this extension.

[1] 5ccd8530dd7 ("ceph: handle idmapped mounts in create_request_message()")

Suggested-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..6ff547ef21f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1189,6 +1189,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
 #define SB_I_RETIRED	0x00000800	/* superblock shouldn't be reused */
 #define SB_I_NOUMASK	0x00001000	/* VFS does not apply umask */
+#define SB_I_NOIDMAP	0x00002000	/* No idmapped mounts on this superblock */
 
 /* Possible states of 'frozen' field */
 enum {
-- 
cgit v1.2.3


From aa16880d9f13c6490e80ad614402c8a6fe6f3efa Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Tue, 3 Sep 2024 17:16:13 +0200
Subject: fuse: add basic infrastructure to support idmappings

Add some preparational changes in fuse_get_req/fuse_force_creds
to handle idmappings.

Miklos suggested [1], [2] to change the meaning of in.h.uid/in.h.gid
fields when daemon declares support for idmapped mounts. In a new semantic,
we fill uid/gid values in fuse header with a id-mapped caller uid/gid (for
requests which create new inodes), for all the rest cases we just send -1
to userspace.

No functional changes intended.

Link: https://lore.kernel.org/all/CAJfpegsVY97_5mHSc06mSw79FehFWtoXT=hhTUK_E-Yhr7OAuQ@mail.gmail.com/ [1]
Link: https://lore.kernel.org/all/CAJfpegtHQsEUuFq1k4ZbTD3E1h-GsrN3PWyv7X8cg6sfU_W2Yw@mail.gmail.com/ [2]
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index d08b99d60f6f..2ccf38181df2 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -984,6 +984,8 @@ struct fuse_fallocate_in {
  */
 #define FUSE_UNIQUE_RESEND (1ULL << 63)
 
+#define FUSE_INVALID_UIDGID ((uint32_t)(-1))
+
 struct fuse_in_header {
 	uint32_t	len;
 	uint32_t	opcode;
-- 
cgit v1.2.3


From 16e1503eaf329129170e4e7a078aee17686967a5 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Tue, 3 Sep 2024 17:16:25 +0200
Subject: fuse: allow idmapped mounts

Now we have everything in place and we can allow idmapped mounts
by setting the FS_ALLOW_IDMAP flag. Notice that real availability
of idmapped mounts will depend on the fuse daemon. Fuse daemon
have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply.

To discuss:
- we enable idmapped mounts support only if "default_permissions" mode is
enabled, because otherwise we would need to deal with UID/GID mappings in
the userspace side OR provide the userspace with idmapped
req->in.h.uid/req->in.h.gid values which is not something that we probably
want to. Idmapped mounts philosophy is not about faking caller uid/gid.

Some extra links and examples:

- libfuse support
https://github.com/mihalicyn/libfuse/commits/idmap_support

- fuse-overlayfs support:
https://github.com/mihalicyn/fuse-overlayfs/commits/idmap_support

- cephfs-fuse conversion example
https://github.com/mihalicyn/ceph/commits/fuse_idmap

- glusterfs conversion example
https://github.com/mihalicyn/glusterfs/commits/fuse_idmap

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 2ccf38181df2..f1e99458e29e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -217,6 +217,9 @@
  *  - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
  *  - add FUSE_NO_EXPORT_SUPPORT init flag
  *  - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag
+ *
+ *  7.41
+ *  - add FUSE_ALLOW_IDMAP
  */
 
 #ifndef _LINUX_FUSE_H
@@ -252,7 +255,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 40
+#define FUSE_KERNEL_MINOR_VERSION 41
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -421,6 +424,7 @@ struct fuse_file_lock {
  * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
  *		    of the request ID indicates resend requests
+ * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -466,6 +470,7 @@ struct fuse_file_lock {
 
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX	FUSE_DIRECT_IO_ALLOW_MMAP
+#define FUSE_ALLOW_IDMAP	(1ULL << 40)
 
 /**
  * CUSE INIT request/reply flags
@@ -984,6 +989,19 @@ struct fuse_fallocate_in {
  */
 #define FUSE_UNIQUE_RESEND (1ULL << 63)
 
+/**
+ * This value will be set by the kernel to
+ * (struct fuse_in_header).{uid,gid} fields in
+ * case when:
+ * - fuse daemon enabled FUSE_ALLOW_IDMAP
+ * - idmapping information is not available and uid/gid
+ *   can not be mapped in accordance with an idmapping.
+ *
+ * Note: an idmapping information always available
+ * for inode creation operations like:
+ * FUSE_MKNOD, FUSE_SYMLINK, FUSE_MKDIR, FUSE_TMPFILE,
+ * FUSE_CREATE and FUSE_RENAME2 (with RENAME_WHITEOUT).
+ */
 #define FUSE_INVALID_UIDGID ((uint32_t)(-1))
 
 struct fuse_in_header {
-- 
cgit v1.2.3


From 071f24ad28cdcdfa544fdb79b1b1d2b423717a11 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 29 Aug 2024 21:35:54 -0700
Subject: KVM: Rename arch hooks related to per-CPU virtualization enabling

Rename the per-CPU hooks used to enable virtualization in hardware to
align with the KVM-wide helpers in kvm_main.c, and to better capture that
the callbacks are invoked on every online CPU.

No functional change intended.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Message-ID: <20240830043600.127750-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b23c6d48392f..4ceecba64f93 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1521,8 +1521,8 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
 #endif
 
 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_arch_hardware_enable(void);
-void kvm_arch_hardware_disable(void);
+int kvm_arch_enable_virtualization_cpu(void);
+void kvm_arch_disable_virtualization_cpu(void);
 #endif
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From b67107a251b01161956892352d53fb122346eda1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 29 Aug 2024 21:35:58 -0700
Subject: KVM: Add arch hooks for enabling/disabling virtualization

Add arch hooks that are invoked when KVM enables/disable virtualization.
x86 will use the hooks to register an "emergency disable" callback, which
is essentially an x86-specific shutdown notifier that is used when the
kernel is doing an emergency reboot/shutdown/kexec.

Add comments for the declarations to help arch code understand exactly
when the callbacks are invoked.  Alternatively, the APIs themselves could
communicate most of the same info, but kvm_arch_pre_enable_virtualization()
and kvm_arch_post_disable_virtualization() are a bit cumbersome, and make
it a bit less obvious that they are intended to be implemented as a pair.

Reviewed-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Acked-by: Kai Huang <kai.huang@intel.com>
Tested-by: Farrah Chen <farrah.chen@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240830043600.127750-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ceecba64f93..dfd6ad66efcc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1521,6 +1521,20 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
 #endif
 
 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+/*
+ * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
+ * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
+ * kvm_usage_count, i.e. at the beginning of the generic hardware enabling
+ * sequence, and at the end of the generic hardware disabling sequence.
+ */
+void kvm_arch_enable_virtualization(void);
+void kvm_arch_disable_virtualization(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
+ * do the actual twiddling of hardware bits.  The hooks are called on all
+ * online CPUs when KVM enables/disabled virtualization, and on a single CPU
+ * when that CPU is onlined/offlined (including for Resume/Suspend).
+ */
 int kvm_arch_enable_virtualization_cpu(void);
 void kvm_arch_disable_virtualization_cpu(void);
 #endif
-- 
cgit v1.2.3


From 940ce73bdec5a020fec058ba947d2bf627462c53 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Wed, 4 Sep 2024 11:08:44 -0700
Subject: bpf: Remove the insn_buf array stack usage from the inline_bpf_loop()

This patch removes the insn_buf array stack usage from the
inline_bpf_loop(). Instead, the env->insn_buf is used. The
usage in inline_bpf_loop() needs more than 16 insn, so the
INSN_BUF_SIZE needs to be increased from 16 to 32.
The compiler stack size warning on the verifier is gone
after this change.

Cc: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240904180847.56947-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 2e20207315a9..8458632824a4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -24,7 +24,7 @@
  */
 #define TMP_STR_BUF_LEN 320
 /* Patch buffer size */
-#define INSN_BUF_SIZE 16
+#define INSN_BUF_SIZE 32
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
-- 
cgit v1.2.3


From a8532fac7b5d27b8d62008a89593dccb6f9786ef Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 30 Aug 2024 22:02:34 -1000
Subject: sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable

During scx_ops_enable(), SCX needs to invoke the sleepable ops.init_task()
on every task. To do this, it does get_task_struct() on each iterated task,
drop the lock and then call ops.init_task().

However, a TASK_DEAD task may already have lost all its usage count and be
waiting for RCU grace period to be freed. If get_task_struct() is called on
such task, use-after-free can happen. To avoid such situations,
scx_ops_enable() skips initialization of TASK_DEAD tasks, which seems safe
as they are never going to be scheduled again.

Unfortunately, a racing sched_setscheduler(2) can grab the task before the
task is unhashed and then continue to e.g. move the task from RT to SCX
after TASK_DEAD is set and ops_enable skipped the task. As the task hasn't
gone through scx_ops_init_task(), scx_ops_enable_task() called from
switching_to_scx() triggers the following warning:

  sched_ext: Invalid task state transition 0 -> 3 for stress-ng-race-[2872]
  WARNING: CPU: 6 PID: 2367 at kernel/sched/ext.c:3327 scx_ops_enable_task+0x18f/0x1f0
  ...
  RIP: 0010:scx_ops_enable_task+0x18f/0x1f0
  ...
   switching_to_scx+0x13/0xa0
   __sched_setscheduler+0x84e/0xa50
   do_sched_setscheduler+0x104/0x1c0
   __x64_sys_sched_setscheduler+0x18/0x30
   do_syscall_64+0x7b/0x140
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

As in the ops_disable path, it just doesn't seem like a good idea to leave
any task in an inconsistent state, even when the task is dead. The root
cause is ops_enable not being able to tell reliably whether a task is truly
dead (no one else is looking at it and it's about to be freed) and was
testing TASK_DEAD instead. Fix it by testing the task's usage count
directly.

- ops_init no longer ignores TASK_DEAD tasks. As now all users iterate all
  tasks, @include_dead is removed from scx_task_iter_next_locked() along
  with dead task filtering.

- tryget_task_struct() is added. Tasks are skipped iff tryget_task_struct()
  fails.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Vernet <void@manifault.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/sched/task.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 4df2f9055587..0f2aeb37bbb0 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -120,6 +120,11 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
 	return t;
 }
 
+static inline struct task_struct *tryget_task_struct(struct task_struct *t)
+{
+	return refcount_inc_not_zero(&t->usage) ? t : NULL;
+}
+
 extern void __put_task_struct(struct task_struct *t);
 extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
 
-- 
cgit v1.2.3


From 8195136669661fdfe54e9a8923c33b31c92fc1da Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 4 Sep 2024 10:24:59 -1000
Subject: sched_ext: Add cgroup support

Add sched_ext_ops operations to init/exit cgroups, and track task migrations
and config changes. A BPF scheduler may not implement or implement only
subset of cgroup features. The implemented features can be indicated using
%SCX_OPS_HAS_CGOUP_* flags. If cgroup configuration makes use of features
that are not implemented, a warning is triggered.

While a BPF scheduler is being enabled and disabled, relevant cgroup
operations are locked out using scx_cgroup_rwsem. This avoids situations
like task prep taking place while the task is being moved across cgroups,
making things easier for BPF schedulers.

v7: - cgroup interface file visibility toggling is dropped in favor just
      warning messages. Dynamically changing interface visiblity caused more
      confusion than helping.

v6: - Updated to reflect the removal of SCX_KF_SLEEPABLE.

    - Updated to use CONFIG_GROUP_SCHED_WEIGHT and fixes for
      !CONFIG_FAIR_GROUP_SCHED && CONFIG_EXT_GROUP_SCHED.

v5: - Flipped the locking order between scx_cgroup_rwsem and
      cpus_read_lock() to avoid locking order conflict w/ cpuset. Better
      documentation around locking.

    - sched_move_task() takes an early exit if the source and destination
      are identical. This triggered the warning in scx_cgroup_can_attach()
      as it left p->scx.cgrp_moving_from uncleared. Updated the cgroup
      migration path so that ops.cgroup_prep_move() is skipped for identity
      migrations so that its invocations always match ops.cgroup_move()
      one-to-one.

v4: - Example schedulers moved into their own patches.

    - Fix build failure when !CONFIG_CGROUP_SCHED, reported by Andrea Righi.

v3: - Make scx_example_pair switch all tasks by default.

    - Convert to BPF inline iterators.

    - scx_bpf_task_cgroup() is added to determine the current cgroup from
      CPU controller's POV. This allows BPF schedulers to accurately track
      CPU cgroup membership.

    - scx_example_flatcg added. This demonstrates flattened hierarchy
      implementation of CPU cgroup control and shows significant performance
      improvement when cgroups which are nested multiple levels are under
      competition.

v2: - Build fixes for different CONFIG combinations.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
---
 include/linux/sched/ext.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index db2a266113ac..dc646cca4fcf 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -188,6 +188,9 @@ struct sched_ext_entity {
 	bool			disallow;	/* reject switching into SCX */
 
 	/* cold fields */
+#ifdef CONFIG_EXT_GROUP_SCHED
+	struct cgroup		*cgrp_moving_from;
+#endif
 	/* must be the last field, see init_scx_entity() */
 	struct list_head	tasks_node;
 };
-- 
cgit v1.2.3


From 4e893545ef8712d25f3176790ebb95beb073637e Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Wed, 4 Sep 2024 12:48:47 +0200
Subject: PCI/NPEM: Add Native PCIe Enclosure Management support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Native PCIe Enclosure Management (NPEM, PCIe r6.1 sec 6.28) allows managing
LEDs in storage enclosures. NPEM is indication oriented and it does not
give direct access to LEDs. Although each indication *could* represent an
individual LED, multiple indications could also be represented as a single,
multi-color LED or a single LED blinking in a specific interval.  The
specification leaves that open.

Each enabled indication (capability register bit on) is represented as a
ledclass_dev which can be controlled through sysfs. For every ledclass
device only 2 brightness states are allowed: LED_ON (1) or LED_OFF (0).
This corresponds to the NPEM control register (Indication bit on/off).

Ledclass devices appear in sysfs as child devices (subdirectory) of PCI
device which has an NPEM Extended Capability and indication is enabled in
NPEM capability register. For example, these are LEDs created for pcieport
"10000:02:05.0" on my setup:

  leds/
  ├── 10000:02:05.0:enclosure:fail
  ├── 10000:02:05.0:enclosure:locate
  ├── 10000:02:05.0:enclosure:ok
  └── 10000:02:05.0:enclosure:rebuild

They can be also found in "/sys/class/leds" directory. The parent PCIe
device domain/bus/device/function address is used to guarantee uniqueness
across leds subsystem.

To enable/disable a "fail" indication, the "brightness" file can be edited:

  echo 1 > ./leds/10000:02:05.0:enclosure:fail/brightness
  echo 0 > ./leds/10000:02:05.0:enclosure:fail/brightness

PCIe r6.1, sec 7.9.19.2 defines the possible indications.

Multiple indications for same parent PCIe device can conflict and hardware
may update them when processing new request. To avoid issues, driver
refresh all indications by reading back control register.

This driver expects to be the exclusive NPEM extended capability manager.
It waits up to 1 second after imposing new request, it doesn't verify if
controller is busy before write, and it assumes the mutex lock gives
protection from concurrent updates.

If _DSM LED management is available, we assume the platform may be using
NPEM for its own purposes (see PCI Firmware Spec r3.3 sec 4.7), so the
driver does not use NPEM. A future patch will add _DSM support; an info
message notes whether NPEM or _DSM is being used.

NPEM is a PCIe extended capability so it should be registered in
pcie_init_capabilities() but it is not possible due to LED dependency.  The
parent pci_device must be added earlier for led_classdev_register() to be
successful. NPEM does not require configuration on kernel side, so it is
safe to register LED devices later.

Link: https://lore.kernel.org/r/20240904104848.23480-3-mariusz.tkaczyk@linux.intel.com
Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/pci.h           |  3 +++
 include/uapi/linux/pci_regs.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..c9db853e269f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -516,6 +516,9 @@ struct pci_dev {
 #endif
 #ifdef CONFIG_PCI_DOE
 	struct xarray	doe_mbs;	/* Data Object Exchange mailboxes */
+#endif
+#ifdef CONFIG_PCI_NPEM
+	struct npem	*npem;		/* Native PCIe Enclosure Management */
 #endif
 	u16		acs_cap;	/* ACS Capability offset */
 	phys_addr_t	rom;		/* Physical address if not from BAR */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 94c00996e633..9751b413f3d6 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -740,6 +740,7 @@
 #define PCI_EXT_CAP_ID_DVSEC	0x23	/* Designated Vendor-Specific */
 #define PCI_EXT_CAP_ID_DLF	0x25	/* Data Link Feature */
 #define PCI_EXT_CAP_ID_PL_16GT	0x26	/* Physical Layer 16.0 GT/s */
+#define PCI_EXT_CAP_ID_NPEM	0x29	/* Native PCIe Enclosure Management */
 #define PCI_EXT_CAP_ID_PL_32GT  0x2A    /* Physical Layer 32.0 GT/s */
 #define PCI_EXT_CAP_ID_DOE	0x2E	/* Data Object Exchange */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DOE
@@ -1121,6 +1122,40 @@
 #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK		0x000000F0
 #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT	4
 
+/* Native PCIe Enclosure Management */
+#define PCI_NPEM_CAP     0x04 /* NPEM capability register */
+#define  PCI_NPEM_CAP_CAPABLE     0x00000001 /* NPEM Capable */
+
+#define PCI_NPEM_CTRL    0x08 /* NPEM control register */
+#define  PCI_NPEM_CTRL_ENABLE     0x00000001 /* NPEM Enable */
+
+/*
+ * Native PCIe Enclosure Management indication bits and Reset command bit
+ * are corresponding for capability and control registers.
+ */
+#define  PCI_NPEM_CMD_RESET       0x00000002 /* Reset Command */
+#define  PCI_NPEM_IND_OK          0x00000004 /* OK */
+#define  PCI_NPEM_IND_LOCATE      0x00000008 /* Locate */
+#define  PCI_NPEM_IND_FAIL        0x00000010 /* Fail */
+#define  PCI_NPEM_IND_REBUILD     0x00000020 /* Rebuild */
+#define  PCI_NPEM_IND_PFA         0x00000040 /* Predicted Failure Analysis */
+#define  PCI_NPEM_IND_HOTSPARE    0x00000080 /* Hot Spare */
+#define  PCI_NPEM_IND_ICA         0x00000100 /* In Critical Array */
+#define  PCI_NPEM_IND_IFA         0x00000200 /* In Failed Array */
+#define  PCI_NPEM_IND_IDT         0x00000400 /* Device Type */
+#define  PCI_NPEM_IND_DISABLED    0x00000800 /* Disabled */
+#define  PCI_NPEM_IND_SPEC_0      0x01000000
+#define  PCI_NPEM_IND_SPEC_1      0x02000000
+#define  PCI_NPEM_IND_SPEC_2      0x04000000
+#define  PCI_NPEM_IND_SPEC_3      0x08000000
+#define  PCI_NPEM_IND_SPEC_4      0x10000000
+#define  PCI_NPEM_IND_SPEC_5      0x20000000
+#define  PCI_NPEM_IND_SPEC_6      0x40000000
+#define  PCI_NPEM_IND_SPEC_7      0x80000000
+
+#define PCI_NPEM_STATUS  0x0c /* NPEM status register */
+#define  PCI_NPEM_STATUS_CC       0x00000001 /* Command Completed */
+
 /* Data Object Exchange */
 #define PCI_DOE_CAP		0x04    /* DOE Capabilities Register */
 #define  PCI_DOE_CAP_INT_SUP			0x00000001  /* Interrupt Support */
-- 
cgit v1.2.3


From 1705341485ff1eec097dfa26891d03afe5907e16 Mon Sep 17 00:00:00 2001
From: Shradha Gupta <shradhagupta@linux.microsoft.com>
Date: Sun, 1 Sep 2024 20:45:34 -0700
Subject: net: mana: Improve mana_set_channels() in low mem conditions

The mana_set_channels() function requires detaching the mana
driver and reattaching it with changed channel values.
During this operation if the system is low on memory, the reattach
might fail, causing the network device being down.
To avoid this we pre-allocate buffers at the beginning of set operation,
to prevent complete network loss

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://patch.msgid.link/1725248734-21760-1-git-send-email-shradhagupta@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mana/mana.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 1f869624811d..dfbf78d4e557 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -488,7 +488,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
 void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
 int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 void mana_query_gf_stats(struct mana_port_context *apc);
-int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu);
+int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
-- 
cgit v1.2.3


From 1083d733eb2624837753046924a9248555a1bfbf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 3 Sep 2024 16:35:54 +0300
Subject: ipv4: Fix user space build failure due to header change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RT_TOS() from include/uapi/linux/in_route.h is defined using
IPTOS_TOS_MASK from include/uapi/linux/ip.h. This is problematic for
files such as include/net/ip_fib.h that want to use RT_TOS() as without
including both header files kernel compilation fails:

In file included from ./include/net/ip_fib.h:25,
                 from ./include/net/route.h:27,
                 from ./include/net/lwtunnel.h:9,
                 from net/core/dst.c:24:
./include/net/ip_fib.h: In function ‘fib_dscp_masked_match’:
./include/uapi/linux/in_route.h:31:32: error: ‘IPTOS_TOS_MASK’ undeclared (first use in this function)
   31 | #define RT_TOS(tos)     ((tos)&IPTOS_TOS_MASK)
      |                                ^~~~~~~~~~~~~~
./include/net/ip_fib.h:440:45: note: in expansion of macro ‘RT_TOS’
  440 |         return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));

Therefore, cited commit changed linux/in_route.h to include linux/ip.h.
However, as reported by David, this breaks iproute2 compilation due
overlapping definitions between linux/ip.h and
/usr/include/netinet/ip.h:

In file included from ../include/uapi/linux/in_route.h:5,
                 from iproute.c:19:
../include/uapi/linux/ip.h:25:9: warning: "IPTOS_TOS" redefined
   25 | #define IPTOS_TOS(tos)          ((tos)&IPTOS_TOS_MASK)
      |         ^~~~~~~~~
In file included from iproute.c:17:
/usr/include/netinet/ip.h:222:9: note: this is the location of the previous definition
  222 | #define IPTOS_TOS(tos)          ((tos) & IPTOS_TOS_MASK)

Fix by changing include/net/ip_fib.h to include linux/ip.h. Note that
usage of RT_TOS() should not spread further in the kernel due to recent
work in this area.

Fixes: 1fa3314c14c6 ("ipv4: Centralize TOS matching")
Reported-by: David Ahern <dsahern@kernel.org>
Closes: https://lore.kernel.org/netdev/2f5146ff-507d-4cab-a195-b28c0c9e654e@kernel.org/
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/20240903133554.2807343-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h          | 1 +
 include/uapi/linux/in_route.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 269ec10f63e4..967e4dc555fa 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/refcount.h>
+#include <linux/ip.h>
 #include <linux/in_route.h>
 
 struct fib_config {
diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h
index 10bdd7e7107f..0cc2c23b47f8 100644
--- a/include/uapi/linux/in_route.h
+++ b/include/uapi/linux/in_route.h
@@ -2,8 +2,6 @@
 #ifndef _LINUX_IN_ROUTE_H
 #define _LINUX_IN_ROUTE_H
 
-#include <linux/ip.h>
-
 /* IPv4 routing cache flags */
 
 #define RTCF_DEAD	RTNH_F_DEAD
-- 
cgit v1.2.3


From 8f52de0077ba3bf41e5d53d67a185700f41efce7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jun 2024 11:33:01 +0100
Subject: netfs: Reduce number of conditional branches in netfs_perform_write()

Reduce the number of conditional branches in netfs_perform_write() by
merging in netfs_how_to_modify() and then creating a separate if-statement
for each way we might modify a folio.  Note that this means replicating the
data copy in each path.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-6-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 606b4a0f92da..a4fd5dea52f4 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -129,7 +129,6 @@
 	E_(netfs_sreq_trace_put_terminated,	"PUT TERM   ")
 
 #define netfs_folio_traces					\
-	/* The first few correspond to enum netfs_how_to_modify */	\
 	EM(netfs_folio_is_uptodate,		"mod-uptodate")	\
 	EM(netfs_just_prefetch,			"mod-prefetch")	\
 	EM(netfs_whole_folio_modify,		"mod-whole-f")	\
@@ -139,7 +138,6 @@
 	EM(netfs_flush_content,			"flush")	\
 	EM(netfs_streaming_filled_page,		"mod-streamw-f") \
 	EM(netfs_streaming_cont_filled_page,	"mod-streamw-f+") \
-	/* The rest are for writeback */			\
 	EM(netfs_folio_trace_cancel_copy,	"cancel-copy")	\
 	EM(netfs_folio_trace_clear,		"clear")	\
 	EM(netfs_folio_trace_clear_cc,		"clear-cc")	\
-- 
cgit v1.2.3


From 73425800ac948cb78063b897a0c345d788f3594d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jun 2024 11:26:24 +0100
Subject: netfs, cifs: Move CIFS_INO_MODIFIED_ATTR to netfs_inode

Move CIFS_INO_MODIFIED_ATTR to netfs_inode as NETFS_ICTX_MODIFIED_ATTR and
then make netfs_perform_write() set it.  This means that cifs doesn't need
to implement the ->post_modify() hook.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-7-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c47443e7a97e..71c748c53a1f 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -73,6 +73,7 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
+#define NETFS_ICTX_MODIFIED_ATTR 3		/* Indicate change in mtime/ctime */
 };
 
 /*
-- 
cgit v1.2.3


From 52d55922e0f1db1f580c9f91c174d2392bfad481 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 7 Jun 2024 09:02:58 +0100
Subject: netfs: Move max_len/max_nr_segs from netfs_io_subrequest to
 netfs_io_stream

Move max_len/max_nr_segs from struct netfs_io_subrequest to struct
netfs_io_stream as we only issue one subreq at a time and then don't need
these values again for that subreq unless and until we have to retry it -
in which case we want to renegotiate them.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-8-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 71c748c53a1f..eea5ea2842f6 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -134,6 +134,8 @@ static inline struct netfs_group *netfs_folio_group(struct folio *folio)
 struct netfs_io_stream {
 	/* Submission tracking */
 	struct netfs_io_subrequest *construct;	/* Op being constructed */
+	size_t			sreq_max_len;	/* Maximum size of a subrequest */
+	unsigned int		sreq_max_segs;	/* 0 or max number of segments in an iterator */
 	unsigned int		submit_off;	/* Folio offset we're submitting from */
 	unsigned int		submit_len;	/* Amount of data left to submit */
 	unsigned int		submit_max_len;	/* Amount I/O can be rounded up to */
@@ -177,14 +179,12 @@ struct netfs_io_subrequest {
 	struct list_head	rreq_link;	/* Link in rreq->subrequests */
 	struct iov_iter		io_iter;	/* Iterator for this subrequest */
 	unsigned long long	start;		/* Where to start the I/O */
-	size_t			max_len;	/* Maximum size of the I/O */
 	size_t			len;		/* Size of the I/O */
 	size_t			transferred;	/* Amount of data transferred */
 	refcount_t		ref;
 	short			error;		/* 0 or error that occurred */
 	unsigned short		debug_index;	/* Index in list (for debugging output) */
 	unsigned int		nr_segs;	/* Number of segs in io_iter */
-	unsigned int		max_nr_segs;	/* 0 or max number of segments in an iterator */
 	enum netfs_io_source	source;		/* Where to read from/write to */
 	unsigned char		stream_nr;	/* I/O stream this belongs to */
 	unsigned long		flags;
-- 
cgit v1.2.3


From 51e9a86a4f75c6dc8531407b2ab2ccc4ad137e5a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Jul 2024 09:45:46 +0100
Subject: netfs: Reserve netfs_sreq_source 0 as unset/unknown

Reserve the 0-valued netfs_sreq_source to mean unset or unknown so that it
can be seen in the trace as such rather than appearing as
download-from-server when it's going to get switched to something else.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-9-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        | 1 +
 include/trace/events/netfs.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index eea5ea2842f6..37f8ce9c0284 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -43,6 +43,7 @@ static inline void folio_start_private_2(struct folio *folio)
 #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1	/* - Page needs wb/dirty flag wrangling */
 
 enum netfs_io_source {
+	NETFS_SOURCE_UNKNOWN,
 	NETFS_FILL_WITH_ZEROES,
 	NETFS_DOWNLOAD_FROM_SERVER,
 	NETFS_READ_FROM_CACHE,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index a4fd5dea52f4..f4105b8e5894 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -60,6 +60,7 @@
 	E_(netfs_rreq_trace_write_done,		"WR-DONE")
 
 #define netfs_sreq_sources					\
+	EM(NETFS_SOURCE_UNKNOWN,		"----")		\
 	EM(NETFS_FILL_WITH_ZEROES,		"ZERO")		\
 	EM(NETFS_DOWNLOAD_FROM_SERVER,		"DOWN")		\
 	EM(NETFS_READ_FROM_CACHE,		"READ")		\
-- 
cgit v1.2.3


From c57de2a9259d7dc18a7a425fca91c77502263d8a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Jul 2024 09:43:38 +0100
Subject: netfs: Remove NETFS_COPY_TO_CACHE

Remove NETFS_COPY_TO_CACHE as it isn't used anymore.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-10-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        | 3 +--
 include/trace/events/netfs.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 37f8ce9c0284..63ab2c775a21 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -207,11 +207,10 @@ enum netfs_io_origin {
 	NETFS_READAHEAD,		/* This read was triggered by readahead */
 	NETFS_READPAGE,			/* This read is a synchronous read */
 	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
-	NETFS_COPY_TO_CACHE,		/* This write is to copy a read to the cache */
+	NETFS_DIO_READ,			/* This is a direct I/O read */
 	NETFS_WRITEBACK,		/* This write was triggered by writepages */
 	NETFS_WRITETHROUGH,		/* This write was made by netfs_perform_write() */
 	NETFS_UNBUFFERED_WRITE,		/* This is an unbuffered write */
-	NETFS_DIO_READ,			/* This is a direct I/O read */
 	NETFS_DIO_WRITE,		/* This is a direct I/O write */
 	nr__netfs_io_origin
 } __mode(byte);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index f4105b8e5894..47cd11aaccac 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -34,11 +34,10 @@
 	EM(NETFS_READAHEAD,			"RA")		\
 	EM(NETFS_READPAGE,			"RP")		\
 	EM(NETFS_READ_FOR_WRITE,		"RW")		\
-	EM(NETFS_COPY_TO_CACHE,			"CC")		\
+	EM(NETFS_DIO_READ,			"DR")		\
 	EM(NETFS_WRITEBACK,			"WB")		\
 	EM(NETFS_WRITETHROUGH,			"WT")		\
 	EM(NETFS_UNBUFFERED_WRITE,		"UW")		\
-	EM(NETFS_DIO_READ,			"DR")		\
 	E_(NETFS_DIO_WRITE,			"DW")
 
 #define netfs_rreq_traces					\
-- 
cgit v1.2.3


From 1c3e34bf8802b8b17d0c2067c43bb49b7e83885c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 5 Jul 2024 23:14:51 +0200
Subject: pwm: Make info in traces about affected pwm more useful
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hashed pointer isn't useful to identify the pwm device. Instead
store and emit chipid and hwpwm.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20240705211452.1157967-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/trace/events/pwm.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h
index 12b35e4ff917..8022701c446d 100644
--- a/include/trace/events/pwm.h
+++ b/include/trace/events/pwm.h
@@ -15,7 +15,8 @@ DECLARE_EVENT_CLASS(pwm,
 	TP_ARGS(pwm, state, err),
 
 	TP_STRUCT__entry(
-		__field(struct pwm_device *, pwm)
+		__field(unsigned int, chipid)
+		__field(unsigned int, hwpwm)
 		__field(u64, period)
 		__field(u64, duty_cycle)
 		__field(enum pwm_polarity, polarity)
@@ -24,7 +25,8 @@ DECLARE_EVENT_CLASS(pwm,
 	),
 
 	TP_fast_assign(
-		__entry->pwm = pwm;
+		__entry->chipid = pwm->chip->id;
+		__entry->hwpwm = pwm->hwpwm;
 		__entry->period = state->period;
 		__entry->duty_cycle = state->duty_cycle;
 		__entry->polarity = state->polarity;
@@ -32,8 +34,8 @@ DECLARE_EVENT_CLASS(pwm,
 		__entry->err = err;
 	),
 
-	TP_printk("%p: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d",
-		  __entry->pwm, __entry->period, __entry->duty_cycle,
+	TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d",
+		  __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle,
 		  __entry->polarity, __entry->enabled, __entry->err)
 
 );
-- 
cgit v1.2.3


From f9ecc2febf6fd6ad53208a1c0e1b5066ee65dd8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 12 Jul 2024 19:18:20 +0200
Subject: pwm: Don't export pwm_capture()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is only a single caller of this function, and that's in
drivers/pwm/core.c itself. So don't export the function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20240712171821.1470833-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index f8c2dc12dbd3..8acd60b53f58 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -394,9 +394,6 @@ static inline bool pwm_might_sleep(struct pwm_device *pwm)
 }
 
 /* PWM provider APIs */
-int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
-		unsigned long timeout);
-
 void pwmchip_put(struct pwm_chip *chip);
 struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv);
 struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv);
@@ -462,13 +459,6 @@ static inline void pwm_disable(struct pwm_device *pwm)
 	might_sleep();
 }
 
-static inline int pwm_capture(struct pwm_device *pwm,
-			      struct pwm_capture *result,
-			      unsigned long timeout)
-{
-	return -EINVAL;
-}
-
 static inline void pwmchip_put(struct pwm_chip *chip)
 {
 }
-- 
cgit v1.2.3


From b4fef22c2fb97fa204f0c99c7c7f1c6b422ef0aa Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 28 Aug 2024 20:19:42 +1000
Subject: uapi: explain how per-syscall AT_* flags should be allocated

Unfortunately, the way we have gone about adding new AT_* flags has
been a little messy. In the beginning, all of the AT_* flags had generic
meanings and so it made sense to share the flag bits indiscriminately.
However, we inevitably ran into syscalls that needed their own
syscall-specific flags. Due to the lack of a planned out policy, we
ended up with the following situations:

 * Existing syscalls adding new features tended to use new AT_* bits,
   with some effort taken to try to re-use bits for flags that were so
   obviously syscall specific that they only make sense for a single
   syscall (such as the AT_EACCESS/AT_REMOVEDIR/AT_HANDLE_FID triplet).

   Given the constraints of bitflags, this works well in practice, but
   ideally (to avoid future confusion) we would plan ahead and define a
   set of "per-syscall bits" ahead of time so that when allocating new
   bits we don't end up with a complete mish-mash of which bits are
   supposed to be per-syscall and which aren't.

 * New syscalls dealt with this in several ways:

   - Some syscalls (like renameat2(2), move_mount(2), fsopen(2), and
     fspick(2)) created their separate own flag spaces that have no
     overlap with the AT_* flags. Most of these ended up allocating
     their bits sequentually.

     In the case of move_mount(2) and fspick(2), several flags have
     identical meanings to AT_* flags but were allocated in their own
     flag space.

     This makes sense for syscalls that will never share AT_* flags, but
     for some syscalls this leads to duplication with AT_* flags in a
     way that could cause confusion (if renameat2(2) grew a
     RENAME_EMPTY_PATH it seems likely that users could mistake it for
     AT_EMPTY_PATH since it is an *at(2) syscall).

   - Some syscalls unfortunately ended up both creating their own flag
     space while also using bits from other flag spaces. The most
     obvious example is open_tree(2), where the standard usage ends up
     using flags from *THREE* separate flag spaces:

       open_tree(AT_FDCWD, "/foo", OPEN_TREE_CLONE|O_CLOEXEC|AT_RECURSIVE);

     (Note that O_CLOEXEC is also platform-specific, so several future
     OPEN_TREE_* bits are also made unusable in one fell swoop.)

It's not entirely clear to me what the "right" choice is for new
syscalls. Just saying that all future VFS syscalls should use AT_* flags
doesn't seem practical. openat2(2) has RESOLVE_* flags (many of which
don't make much sense to burn generic AT_* flags for) and move_mount(2)
has separate AT_*-like flags for both the source and target so separate
flags are needed anyway (though it seems possible that renameat2(2)
could grow *_EMPTY_PATH flags at some point, and it's a bit of a shame
they can't be reused).

But at least for syscalls that _do_ choose to use AT_* flags, we should
explicitly state the policy that 0x2ff is currently intended for
per-syscall flags and that new flags should err on the side of
overlapping with existing flag bits (so we can extend the scope of
generic flags in the future if necessary).

And add AT_* aliases for the RENAME_* flags to further cement that
renameat2(2) is an *at(2) flag, just with its own per-syscall flags.

Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-1-10c2c4c16708@cyphar.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/fcntl.h | 80 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index e55a3314bcb0..38a6d66d9e88 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -90,37 +90,69 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define AT_FDCWD		-100    /* Special value for dirfd used to
+					   indicate openat should use the
+					   current working directory. */
+
+
+/* Generic flags for the *at(2) family of syscalls. */
+
+/* Reserved for per-syscall flags	0xff. */
+#define AT_SYMLINK_NOFOLLOW		0x100   /* Do not follow symbolic
+						   links. */
+/* Reserved for per-syscall flags	0x200 */
+#define AT_SYMLINK_FOLLOW		0x400   /* Follow symbolic links. */
+#define AT_NO_AUTOMOUNT			0x800	/* Suppress terminal automount
+						   traversal. */
+#define AT_EMPTY_PATH			0x1000	/* Allow empty relative
+						   pathname to operate on dirfd
+						   directly. */
 /*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
- * unlinkat.  The two functions do completely different things and therefore,
- * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
- * faccessat would be undefined behavior and thus treating it equivalent to
- * AT_EACCESS is valid undefined behavior.
+ * These flags are currently statx(2)-specific, but they could be made generic
+ * in the future and so they should not be used for other per-syscall flags.
  */
-#define AT_FDCWD		-100    /* Special value used to indicate
-                                           openat should use the current
-                                           working directory. */
-#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_STATX_SYNC_TYPE		0x6000	/* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT		0x0000	/* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC		0x2000	/* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC		0x4000	/* - Don't sync attributes with the server */
+
+#define AT_RECURSIVE			0x8000	/* Apply to the entire subtree */
+
+/*
+ * Per-syscall flags for the *at(2) family of syscalls.
+ *
+ * These are flags that are so syscall-specific that a user passing these flags
+ * to the wrong syscall is so "clearly wrong" that we can safely call such
+ * usage "undefined behaviour".
+ *
+ * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value.
+ * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful
+ * only to unlinkat. The two functions do completely different things and
+ * therefore, the flags can be allowed to overlap. For example, passing
+ * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it
+ * equivalent to AT_EACCESS is valid undefined behavior.
+ *
+ * Note for implementers: When picking a new per-syscall AT_* flag, try to
+ * reuse already existing flags first. This leaves us with as many unused bits
+ * as possible, so we can use them for generic bits in the future if necessary.
+ */
+
+/* Flags for renameat2(2) (must match legacy RENAME_* flags). */
+#define AT_RENAME_NOREPLACE	0x0001
+#define AT_RENAME_EXCHANGE	0x0002
+#define AT_RENAME_WHITEOUT	0x0004
+
+/* Flag for faccessat(2). */
 #define AT_EACCESS		0x200	/* Test access permitted for
                                            effective IDs, not real IDs.  */
+/* Flag for unlinkat(2). */
 #define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                            unlinking file.  */
-#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
-#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
-#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
-
-#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
-
-#define AT_RECURSIVE		0x8000	/* Apply to the entire subtree */
+/* Flags for name_to_handle_at(2). */
+#define AT_HANDLE_FID		0x200	/* File handle is needed to compare
+					   object identity and may not be
+					   usable with open_by_handle_at(2). */
 
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
-#define AT_HANDLE_FID		AT_REMOVEDIR	/* file handle is needed to
-					compare object identity and may not
-					be usable to open_by_handle_at(2) */
 #if defined(__KERNEL__)
 #define AT_GETATTR_NOSEC	0x80000000
 #endif
-- 
cgit v1.2.3


From 4356d575ef0f39a3e8e0ce0c40d84ce900ac3b61 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 28 Aug 2024 20:19:43 +1000
Subject: fhandle: expose u64 mount id to name_to_handle_at(2)

Now that we provide a unique 64-bit mount ID interface in statx(2), we
can now provide a race-free way for name_to_handle_at(2) to provide a
file handle and corresponding mount without needing to worry about
racing with /proc/mountinfo parsing or having to open a file just to do
statx(2).

While this is not necessary if you are using AT_EMPTY_PATH and don't
care about an extra statx(2) call, users that pass full paths into
name_to_handle_at(2) need to know which mount the file handle comes from
(to make sure they don't try to open_by_handle_at a file handle from a
different filesystem) and switching to AT_EMPTY_PATH would require
allocating a file for every name_to_handle_at(2) call, turning

  err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid,
                          AT_HANDLE_MNT_ID_UNIQUE);

into

  int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC);
  err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH);
  err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf);
  mntid = statxbuf.stx_mnt_id;
  close(fd);

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-2-10c2c4c16708@cyphar.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/syscalls.h   | 2 +-
 include/uapi/linux/fcntl.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4bcf6754738d..5758104921e6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -870,7 +870,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 #endif
 asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 				      struct file_handle __user *handle,
-				      int __user *mnt_id, int flag);
+				      void __user *mnt_id, int flag);
 asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      struct file_handle __user *handle,
 				      int flags);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 38a6d66d9e88..87e2dec79fea 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -152,6 +152,7 @@
 #define AT_HANDLE_FID		0x200	/* File handle is needed to compare
 					   object identity and may not be
 					   usable with open_by_handle_at(2). */
+#define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */
 
 #if defined(__KERNEL__)
 #define AT_GETATTR_NOSEC	0x80000000
-- 
cgit v1.2.3


From 19156263cb1f24128a9ba6ef7340be5cbacc3d22 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 5 Sep 2024 10:14:05 +0300
Subject: dma-mapping: use IOMMU DMA calls for common alloc/free page calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Common alloca and free pages routines are called when IOMMU DMA is used,
and internally it calls to DMA ops structure which is not available for
default IOMMU. This patch adds necessary if checks to call IOMMU DMA.

It fixes the following crash:

 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000040
 Mem abort info:
   ESR = 0x0000000096000006
   EC = 0x25: DABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
   FSC = 0x06: level 2 translation fault
 Data abort info:
   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
 user pgtable: 4k pages, 48-bit VAs, pgdp=00000000d20bb000
 [0000000000000040] pgd=08000000d20c1003
 , p4d=08000000d20c1003
 , pud=08000000d20c2003, pmd=0000000000000000
 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP
 Modules linked in: ipv6 hci_uart venus_core btqca
v4l2_mem2mem btrtl qcom_spmi_adc5 sbs_battery btbcm qcom_vadc_common
cros_ec_typec videobuf2_v4l2 leds_cros_ec cros_kbd_led_backlight
cros_ec_chardev videodev elan_i2c
videobuf2_common qcom_stats mc bluetooth coresight_stm stm_core
ecdh_generic ecc pwrseq_core panel_edp icc_bwmon ath10k_snoc ath10k_core
ath mac80211 phy_qcom_qmp_combo aux_bridge libarc4 coresight_replicator
coresight_etm4x coresight_tmc
coresight_funnel cfg80211 rfkill coresight qcom_wdt cbmem ramoops
reed_solomon pwm_bl coreboot_table backlight crct10dif_ce
 CPU: 7 UID: 0 PID: 70 Comm: kworker/u32:4 Not tainted 6.11.0-rc6-next-20240903-00003-gdfc6015d0711 #660
 Hardware name: Google Lazor Limozeen without Touchscreen (rev5 - rev8) (DT)
 Workqueue: events_unbound deferred_probe_work_func
 hub 2-1:1.0: 4 ports detected

 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : dma_common_alloc_pages+0x54/0x1b4
 lr : dma_common_alloc_pages+0x4c/0x1b4
 sp : ffff8000807d3730
 x29: ffff8000807d3730 x28: ffff02a7d312f880 x27: 0000000000000001
 x26: 000000000000c000 x25: 0000000000000000 x24: 0000000000000001
 x23: ffff02a7d23b6898 x22: 0000000000006cc0 x21: 000000000000c000
 x20: ffff02a7858bf410 x19: fffffe0a60006000 x18: 0000000000000001
 x17: 00000000000000d5 x16: 1fffe054f0bcc261 x15: 0000000000000001
 x14: ffff02a7844dc680 x13: 0000000000100180 x12: dead000000000100
 x11: dead000000000122 x10: 00000000001001ff x9 : ffff02a87f7b7b00
 x8 : ffff02a87f7b7b00 x7 : ffff405977d6b000 x6 : ffff8000807d3310
 x5 : ffff02a87f6b6398 x4 : 0000000000000001 x3 : ffff405977d6b000
 x2 : ffff02a7844dc600 x1 : 0000000100000000 x0 : fffffe0a60006000
 Call trace:
  dma_common_alloc_pages+0x54/0x1b4
  __dma_alloc_pages+0x68/0x90
  dma_alloc_pages+0x10/0x1c
  snd_dma_noncoherent_alloc+0x28/0x8c
  __snd_dma_alloc_pages+0x30/0x50
  snd_dma_alloc_dir_pages+0x40/0x80
  do_alloc_pages+0xb8/0x13c
  preallocate_pcm_pages+0x6c/0xf8
  preallocate_pages+0x160/0x1a4
  snd_pcm_set_managed_buffer_all+0x64/0xb0
  lpass_platform_pcm_new+0xc0/0xe8
  snd_soc_pcm_component_new+0x3c/0xc8
  soc_new_pcm+0x4fc/0x668
  snd_soc_bind_card+0xabc/0xbac
  snd_soc_register_card+0xf0/0x108
  devm_snd_soc_register_card+0x4c/0xa4
  sc7180_snd_platform_probe+0x180/0x224
  platform_probe+0x68/0xc0
  really_probe+0xbc/0x298
  __driver_probe_device+0x78/0x12c
  driver_probe_device+0x3c/0x15c
  __device_attach_driver+0xb8/0x134
  bus_for_each_drv+0x84/0xe0
  __device_attach+0x9c/0x188
  device_initial_probe+0x14/0x20
  bus_probe_device+0xac/0xb0
  deferred_probe_work_func+0x88/0xc0
  process_one_work+0x14c/0x28c
  worker_thread+0x2cc/0x3d4
  kthread+0x114/0x118
  ret_from_fork+0x10/0x20
 Code: f9411c19 940000c9 aa0003f3 b4000460 (f9402326)
 ---[ end trace 0000000000000000 ]---

Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu")
Closes: https://lore.kernel.org/all/10431dfd-ce04-4e0f-973b-c78477303c18@notapiano
Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> #KernelCI
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/iommu-dma.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index d30a58bf00fd..1bb55ca1ab79 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -10,6 +10,10 @@
 #include <linux/dma-direction.h>
 
 #ifdef CONFIG_IOMMU_DMA
+static inline bool use_dma_iommu(struct device *dev)
+{
+	return dev->dma_iommu;
+}
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs);
@@ -49,6 +53,10 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 		int nelems, enum dma_data_direction dir);
 #else
+static inline bool use_dma_iommu(struct device *dev)
+{
+	return false;
+}
 static inline dma_addr_t iommu_dma_map_page(struct device *dev,
 		struct page *page, unsigned long offset, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
-- 
cgit v1.2.3


From 59da880afed211c989ef65da577b24215ce57774 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 3 Sep 2024 10:45:58 -0700
Subject: uprobes: get rid of enum uprobe_filter_ctx in uprobe filter callbacks

It serves no purpose beyond adding unnecessray argument passed to the
filter callback. Just get rid of it, no one is actually using it.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20240903174603.3554182-4-andrii@kernel.org
---
 include/linux/uprobes.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f50df1fa93e7..63ae2ade3487 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -28,20 +28,12 @@ struct page;
 
 #define MAX_URETPROBE_DEPTH		64
 
-enum uprobe_filter_ctx {
-	UPROBE_FILTER_REGISTER,
-	UPROBE_FILTER_UNREGISTER,
-	UPROBE_FILTER_MMAP,
-};
-
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
 	int (*ret_handler)(struct uprobe_consumer *self,
 				unsigned long func,
 				struct pt_regs *regs);
-	bool (*filter)(struct uprobe_consumer *self,
-				enum uprobe_filter_ctx ctx,
-				struct mm_struct *mm);
+	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
 
 	struct uprobe_consumer *next;
 };
-- 
cgit v1.2.3


From cc01bd044e6a521d2cd128f685ee8d23ef0067f2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 3 Sep 2024 10:45:59 -0700
Subject: uprobes: travers uprobe's consumer list locklessly under SRCU
 protection

uprobe->register_rwsem is one of a few big bottlenecks to scalability of
uprobes, so we need to get rid of it to improve uprobe performance and
multi-CPU scalability.

First, we turn uprobe's consumer list to a typical doubly-linked list
and utilize existing RCU-aware helpers for traversing such lists, as
well as adding and removing elements from it.

For entry uprobes we already have SRCU protection active since before
uprobe lookup. For uretprobe we keep refcount, guaranteeing that uprobe
won't go away from under us, but we add SRCU protection around consumer
list traversal.

Lastly, to keep handler_chain()'s UPROBE_HANDLER_REMOVE handling simple,
we remember whether any removal was requested during handler calls, but
then we double-check the decision under a proper register_rwsem using
consumers' filter callbacks. Handler removal is very rare, so this extra
lock won't hurt performance, overall, but we also avoid the need for any
extra protection (e.g., seqcount locks).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20240903174603.3554182-5-andrii@kernel.org
---
 include/linux/uprobes.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 63ae2ade3487..f112b56e9479 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -29,13 +29,21 @@ struct page;
 #define MAX_URETPROBE_DEPTH		64
 
 struct uprobe_consumer {
+	/*
+	 * handler() can return UPROBE_HANDLER_REMOVE to signal the need to
+	 * unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is
+	 * returned, filter() callback has to be implemented as well and it
+	 * should return false to "confirm" the decision to uninstall uprobe
+	 * for the current process. If filter() is omitted or returns true,
+	 * UPROBE_HANDLER_REMOVE is effectively ignored.
+	 */
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
 	int (*ret_handler)(struct uprobe_consumer *self,
 				unsigned long func,
 				struct pt_regs *regs);
 	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
 
-	struct uprobe_consumer *next;
+	struct list_head cons_node;
 };
 
 #ifdef CONFIG_UPROBES
-- 
cgit v1.2.3


From 04b01625da130c7521b768996cd5e48052198b97 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 3 Sep 2024 10:46:00 -0700
Subject: perf/uprobe: split uprobe_unregister()

With uprobe_unregister() having grown a synchronize_srcu(), it becomes
fairly slow to call. Esp. since both users of this API call it in a
loop.

Peel off the sync_srcu() and do it once, after the loop.

We also need to add uprobe_unregister_sync() into uprobe_register()'s
error handling path, as we need to be careful about returning to the
caller before we have a guarantee that partially attached consumer won't
be called anymore. This is an unlikely slow path and this should be
totally fine to be slow in the case of a failed attach.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Co-developed-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20240903174603.3554182-6-andrii@kernel.org
---
 include/linux/uprobes.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f112b56e9479..2b294bf1881f 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -115,7 +115,8 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
-extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc);
+extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
+extern void uprobe_unregister_sync(void);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void uprobe_start_dup_mmap(void);
@@ -164,7 +165,10 @@ uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add)
 	return -ENOSYS;
 }
 static inline void
-uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
+uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc)
+{
+}
+static inline void uprobe_unregister_sync(void)
 {
 }
 static inline int uprobe_mmap(struct vm_area_struct *vma)
-- 
cgit v1.2.3


From 50a38035ed5ccc2ab8a28eaf70c3c7a87e060345 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 3 Sep 2024 10:46:01 -0700
Subject: rbtree: provide rb_find_rcu() / rb_find_add_rcu()

Much like latch_tree, add two RCU methods for the regular RB-tree,
which can be used in conjunction with a seqcount to provide lockless
lookups.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20240903174603.3554182-7-andrii@kernel.org
---
 include/linux/rbtree.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index f7edca369eda..7c173aa64e1e 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -244,6 +244,42 @@ rb_find_add(struct rb_node *node, struct rb_root *tree,
 	return NULL;
 }
 
+/**
+ * rb_find_add_rcu() - find equivalent @node in @tree, or add @node
+ * @node: node to look-for / insert
+ * @tree: tree to search / modify
+ * @cmp: operator defining the node order
+ *
+ * Adds a Store-Release for link_node.
+ *
+ * Returns the rb_node matching @node, or NULL when no match is found and @node
+ * is inserted.
+ */
+static __always_inline struct rb_node *
+rb_find_add_rcu(struct rb_node *node, struct rb_root *tree,
+		int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+	struct rb_node **link = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	int c;
+
+	while (*link) {
+		parent = *link;
+		c = cmp(node, parent);
+
+		if (c < 0)
+			link = &parent->rb_left;
+		else if (c > 0)
+			link = &parent->rb_right;
+		else
+			return parent;
+	}
+
+	rb_link_node_rcu(node, parent, link);
+	rb_insert_color(node, tree);
+	return NULL;
+}
+
 /**
  * rb_find() - find @key in tree @tree
  * @key: key to match
@@ -272,6 +308,37 @@ rb_find(const void *key, const struct rb_root *tree,
 	return NULL;
 }
 
+/**
+ * rb_find_rcu() - find @key in tree @tree
+ * @key: key to match
+ * @tree: tree to search
+ * @cmp: operator defining the node order
+ *
+ * Notably, tree descent vs concurrent tree rotations is unsound and can result
+ * in false-negatives.
+ *
+ * Returns the rb_node matching @key or NULL.
+ */
+static __always_inline struct rb_node *
+rb_find_rcu(const void *key, const struct rb_root *tree,
+	    int (*cmp)(const void *key, const struct rb_node *))
+{
+	struct rb_node *node = tree->rb_node;
+
+	while (node) {
+		int c = cmp(key, node);
+
+		if (c < 0)
+			node = rcu_dereference_raw(node->rb_left);
+		else if (c > 0)
+			node = rcu_dereference_raw(node->rb_right);
+		else
+			return node;
+	}
+
+	return NULL;
+}
+
 /**
  * rb_find_first() - find the first @key in @tree
  * @key: key to match
-- 
cgit v1.2.3


From e04e2b760ddbe3d7b283a05898c3a029085cd8cd Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 1 Sep 2024 05:10:52 +0200
Subject: platform/x86: wmi: Pass event data directly to legacy notify handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current legacy WMI handlers are susceptible to picking up wrong
WMI event data on systems where different WMI devices share some
notification IDs.

Prevent this by letting the WMI driver core taking care of retrieving
the event data. This also simplifies the legacy WMI handlers and their
implementation inside the WMI driver core.

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20240901031055.3030-3-W_Armin@gmx.de
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0687a442fec7..eed105b1fbfb 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -386,7 +386,7 @@ extern bool acpi_is_pnp_device(struct acpi_device *);
 
 #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)
 
-typedef void (*wmi_notify_handler) (u32 value, void *context);
+typedef void (*wmi_notify_handler) (union acpi_object *data, void *context);
 
 int wmi_instance_count(const char *guid);
 
-- 
cgit v1.2.3


From 79a56f4c8fa629ac79ada8f4b746195bd91f09c7 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 1 Sep 2024 05:10:53 +0200
Subject: platform/x86: wmi: Remove wmi_get_event_data()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the WMI driver core now takes care of retrieving the
WMI event data even for legacy WMI notify handlers, this
function is no longer used.

Remove it to prevent WMI drivers from messing up the ACPI
firmware on some machines.

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20240901031055.3030-4-W_Armin@gmx.de
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/acpi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index eed105b1fbfb..3cbe4b57bc73 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -401,7 +401,6 @@ extern acpi_status wmi_set_block(const char *guid, u8 instance,
 extern acpi_status wmi_install_notify_handler(const char *guid,
 					wmi_notify_handler handler, void *data);
 extern acpi_status wmi_remove_notify_handler(const char *guid);
-extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out);
 extern bool wmi_has_guid(const char *guid);
 extern char *wmi_get_acpi_device_uid(const char *guid);
 
-- 
cgit v1.2.3


From a4311c274e08001b129bd5ffd2a41dcbd3e0a855 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 29 Aug 2024 20:31:50 +0200
Subject: kunit: Fix kernel-doc for EXPORT_SYMBOL_IF_KUNIT

While kunit/visibility.h is today not included in any generated
kernel documentation, also likely due to the fact that none of the
existing comments are correctly recognized as kernel-doc, but once
we decide to add this header and fix the tool, there will be:

../include/kunit/visibility.h:61: warning: Function parameter or
struct member 'symbol' not described in 'EXPORT_SYMBOL_IF_KUNIT'

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rae Moar <rmoar@google.com>
Acked-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/visibility.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/kunit/visibility.h b/include/kunit/visibility.h
index 0dfe35feeec6..efff77b58dd6 100644
--- a/include/kunit/visibility.h
+++ b/include/kunit/visibility.h
@@ -22,6 +22,7 @@
      * EXPORTED_FOR_KUNIT_TESTING namespace only if CONFIG_KUNIT is
      * enabled. Must use MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING)
      * in test file in order to use symbols.
+     * @symbol: the symbol identifier to export
      */
     #define EXPORT_SYMBOL_IF_KUNIT(symbol) EXPORT_SYMBOL_NS(symbol, \
 	    EXPORTED_FOR_KUNIT_TESTING)
-- 
cgit v1.2.3


From 3e6a7e3cda773adacc2fa4b9623d0a8c0f904d50 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 27 Aug 2024 09:59:38 -0700
Subject: iommufd: Reorder struct forward declarations

Reorder struct forward declarations to alphabetic order to simplify
maintenance, as upcoming patches will add more to the list.

No functional change intended.

Link: https://patch.msgid.link/r/c5dd87100f6f01389b838c63237e28c5dd373358.1724776335.git.nicolinc@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index c2f2f6b9148e..30f832a60ccb 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -11,12 +11,12 @@
 #include <linux/types.h>
 
 struct device;
-struct iommufd_device;
-struct page;
-struct iommufd_ctx;
-struct iommufd_access;
 struct file;
 struct iommu_group;
+struct iommufd_access;
+struct iommufd_ctx;
+struct iommufd_device;
+struct page;
 
 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
 					   struct device *dev, u32 *id);
-- 
cgit v1.2.3


From 6ff4cd1160afafc12ad1603e3d2f39256e4b708d Mon Sep 17 00:00:00 2001
From: Hongbo Li <lihongbo22@huawei.com>
Date: Tue, 27 Aug 2024 10:45:15 +0800
Subject: lib/string_choices: Add str_true_false()/str_false_true() helper

Add str_true_false()/str_false_true() helper to return "true" or
"false" string literal.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Link: https://lore.kernel.org/r/20240827024517.914100-2-lihongbo22@huawei.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index 1320bcdcb89c..ebcc56b28ede 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -48,6 +48,12 @@ static inline const char *str_up_down(bool v)
 }
 #define str_down_up(v)		str_up_down(!(v))
 
+static inline const char *str_true_false(bool v)
+{
+	return v ? "true" : "false";
+}
+#define str_false_true(v)		str_true_false(!(v))
+
 /**
  * str_plural - Return the simple pluralization based on English counts
  * @num: Number used for deciding pluralization
-- 
cgit v1.2.3


From c2708ba91c3c1fba424b77de83b6fc45cbf38c46 Mon Sep 17 00:00:00 2001
From: Hongbo Li <lihongbo22@huawei.com>
Date: Thu, 5 Sep 2024 17:25:39 +0800
Subject: lib/string_choices: Introduce several opposite string choice helpers

Similar to the exists helper: str_enable_disable/
str_enabled_disabled/str_on_off/str_yes_no helpers, we can
add the opposite helpers. That's str_disable_enable,
str_disabled_enabled, str_off_on and str_no_yes.

There are more than 10 cases currently (expect
str_disable_enable now has 3 use cases) exist in the code
can be replaced with these helper.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Acked-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240905092540.2962122-2-lihongbo22@huawei.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index ebcc56b28ede..fd067992260a 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -8,11 +8,13 @@ static inline const char *str_enable_disable(bool v)
 {
 	return v ? "enable" : "disable";
 }
+#define str_disable_enable(v)		str_enable_disable(!(v))
 
 static inline const char *str_enabled_disabled(bool v)
 {
 	return v ? "enabled" : "disabled";
 }
+#define str_disabled_enabled(v)		str_enabled_disabled(!(v))
 
 static inline const char *str_hi_lo(bool v)
 {
@@ -36,11 +38,13 @@ static inline const char *str_on_off(bool v)
 {
 	return v ? "on" : "off";
 }
+#define str_off_on(v)		str_on_off(!(v))
 
 static inline const char *str_yes_no(bool v)
 {
 	return v ? "yes" : "no";
 }
+#define str_no_yes(v)		str_yes_no(!(v))
 
 static inline const char *str_up_down(bool v)
 {
-- 
cgit v1.2.3


From c121d5cc3a993cdbfab46a152bdd50227a4d5e8c Mon Sep 17 00:00:00 2001
From: Hongbo Li <lihongbo22@huawei.com>
Date: Thu, 5 Sep 2024 17:25:40 +0800
Subject: lib/string_choices: Add some comments to make more clear for string
 choices helpers.

Add some comments to explain why we should use string_choices helpers.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Acked-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240905092540.2962122-3-lihongbo22@huawei.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index fd067992260a..120ca0f28e95 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -2,6 +2,19 @@
 #ifndef _LINUX_STRING_CHOICES_H_
 #define _LINUX_STRING_CHOICES_H_
 
+/*
+ * Here provide a series of helpers in the str_$TRUE_$FALSE format (you can
+ * also expand some helpers as needed), where $TRUE and $FALSE are their
+ * corresponding literal strings. These helpers can be used in the printing
+ * and also in other places where constant strings are required. Using these
+ * helpers offers the following benefits:
+ *  1) Reducing the hardcoding of strings, which makes the code more elegant
+ *     through these simple literal-meaning helpers.
+ *  2) Unifying the output, which prevents the same string from being printed
+ *     in various forms, such as enable/disable, enabled/disabled, en/dis.
+ *  3) Deduping by the linker, which results in a smaller binary file.
+ */
+
 #include <linux/types.h>
 
 static inline const char *str_enable_disable(bool v)
-- 
cgit v1.2.3


From 6fe0593bfc3cfab8b4ec7255152a2be40b2e49a3 Mon Sep 17 00:00:00 2001
From: Erling Ljunggren <hljunggr@cisco.com>
Date: Wed, 28 Sep 2022 13:21:43 +0200
Subject: media: videodev2.h: add V4L2_CAP_EDID

Add capability flag to indicate that the device is an EDID-only device.

Signed-off-by: Erling Ljunggren <hljunggr@cisco.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Sebastian Fricke <sebastian.fricke@collabora.com>
Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 725e86c4bbbd..27239cb64065 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -502,6 +502,7 @@ struct v4l2_capability {
 #define V4L2_CAP_META_CAPTURE		0x00800000  /* Is a metadata capture device */
 
 #define V4L2_CAP_READWRITE              0x01000000  /* read/write systemcalls */
+#define V4L2_CAP_EDID			0x02000000  /* Is an EDID-only device */
 #define V4L2_CAP_STREAMING              0x04000000  /* streaming I/O ioctls */
 #define V4L2_CAP_META_OUTPUT		0x08000000  /* Is a metadata output device */
 
-- 
cgit v1.2.3


From c7a29258737076a7caf9ced6a7d710cce890abe5 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 12 Aug 2020 14:20:10 +0200
Subject: media: input: serio.h: add SERIO_EXTRON_DA_HD_PLUS

Add a new serio ID for the Extron DA HD 4K Plus series of 4K HDMI
Distribution Amplifiers. These devices support CEC over the serial
port, so a new serio ID is needed to be able to associate the CEC
driver.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/serio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index ed2a96f43ce4..5a2af0942c9f 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -83,5 +83,6 @@
 #define SERIO_PULSE8_CEC	0x40
 #define SERIO_RAINSHADOW_CEC	0x41
 #define SERIO_FSIA6B	0x42
+#define SERIO_EXTRON_DA_HD_4K_PLUS	0x43
 
 #endif /* _UAPI_SERIO_H */
-- 
cgit v1.2.3


From 6bb8ef90c444c2fd97208bfce42137c4add32bbb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Fri, 2 Aug 2024 12:19:48 +0200
Subject: media: cec: move cec_get/put_device to header

Move cec_get/put_device to the media/cec.h header. This
allows CEC drivers to use this.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/cec.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/media/cec.h b/include/media/cec.h
index 07d2ee8a3904..16b412b3131b 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -298,6 +298,37 @@ struct cec_adapter {
 	char input_phys[40];
 };
 
+static inline int cec_get_device(struct cec_adapter *adap)
+{
+	struct cec_devnode *devnode = &adap->devnode;
+
+	/*
+	 * Check if the cec device is available. This needs to be done with
+	 * the devnode->lock held to prevent an open/unregister race:
+	 * without the lock, the device could be unregistered and freed between
+	 * the devnode->registered check and get_device() calls, leading to
+	 * a crash.
+	 */
+	mutex_lock(&devnode->lock);
+	/*
+	 * return ENODEV if the cec device has been removed
+	 * already or if it is not registered anymore.
+	 */
+	if (!devnode->registered) {
+		mutex_unlock(&devnode->lock);
+		return -ENODEV;
+	}
+	/* and increase the device refcount */
+	get_device(&devnode->dev);
+	mutex_unlock(&devnode->lock);
+	return 0;
+}
+
+static inline void cec_put_device(struct cec_adapter *adap)
+{
+	put_device(&adap->devnode.dev);
+}
+
 static inline void *cec_get_drvdata(const struct cec_adapter *adap)
 {
 	return adap->priv;
-- 
cgit v1.2.3


From 1ae497c78f01855f3695b58481311ffdd429b028 Mon Sep 17 00:00:00 2001
From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Date: Thu, 5 Sep 2024 13:52:32 +0800
Subject: bpf: use type_may_be_null() helper for nullable-param check

Commit 980ca8ceeae6 ("bpf: check bpf_dummy_struct_ops program params for
test runs") does bitwise AND between reg_type and PTR_MAYBE_NULL, which
is correct, but due to type difference the compiler complains:

  net/bpf/bpf_dummy_struct_ops.c:118:31: warning: bitwise operation between different enumeration types ('const enum bpf_reg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    118 |                 if (info && (info->reg_type & PTR_MAYBE_NULL))
        |                              ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~

Workaround the warning by moving the type_may_be_null() helper from
verifier.c into bpf_verifier.h, and reuse it here to check whether param
is nullable.

Fixes: 980ca8ceeae6 ("bpf: check bpf_dummy_struct_ops program params for test runs")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202404241956.HEiRYwWq-lkp@intel.com/
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240905055233.70203-1-shung-hsi.yu@suse.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8458632824a4..4513372c5bc8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -927,6 +927,11 @@ static inline bool type_is_sk_pointer(enum bpf_reg_type type)
 		type == PTR_TO_XDP_SOCK;
 }
 
+static inline bool type_may_be_null(u32 type)
+{
+	return type & PTR_MAYBE_NULL;
+}
+
 static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
 {
 	env->scratched_regs |= 1U << regno;
-- 
cgit v1.2.3


From 12cb32a52eb607dc4d0e45fe6f4cf946d08da0fd Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Thu, 5 Sep 2024 10:47:55 +0800
Subject: kunit: Fix missing kerneldoc comment

Add a missing kerneldoc comment for the 'test' test context parameter,
fixing the following warning:

include/kunit/test.h:492: warning: Function parameter or struct member 'test' not described in 'kunit_kfree_const'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/lkml/20240827160631.67e121ed@canb.auug.org.au/
Fixes: f2c6dbd22017 ("kunit: Device wrappers should also manage driver name")
Signed-off-by: David Gow <davidgow@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 5ac237c949a0..34b71e42fb10 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -484,6 +484,7 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp
 
 /**
  * kunit_kfree_const() - conditionally free test managed memory
+ * @test: The test context object.
  * @x: pointer to the memory
  *
  * Calls kunit_kfree() only if @x is not in .rodata section.
-- 
cgit v1.2.3


From 706ae6446494b4523d33b0d96ea1fd9d50ca9be6 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Wed, 4 Sep 2024 14:41:07 +0300
Subject: clk: fixed-rate: add devm_clk_hw_register_fixed_rate_parent_data()

Add devm_clk_hw_register_fixed_rate_parent_data(), devres-managed helper
to register fixed-rate clock with parent_data.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Link: https://lore.kernel.org/r/20240904-devm_clk_hw_register_fixed_rate_parent_data-v1-1-7f14d6b456e5@maquefel.me
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 4a537260f655..7e43caabb54b 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -393,6 +393,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 #define devm_clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate)  \
 	__clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \
 				     NULL, (flags), (fixed_rate), 0, 0, true)
+/**
+ * devm_clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with
+ * the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_data: parent clk data
+ * @flags: framework-specific flags
+ * @fixed_rate: non-adjustable clock rate
+ */
+#define devm_clk_hw_register_fixed_rate_parent_data(dev, name, parent_data, flags, \
+						    fixed_rate)			   \
+	__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL,		   \
+				     (parent_data), (flags), (fixed_rate), 0,	   \
+				     0, true)
 /**
  * clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with
  * the clock framework
-- 
cgit v1.2.3


From 9934a1bd45b2b03f6d1204a6ae2780d3b009799f Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 5 Aug 2024 10:57:31 +0200
Subject: clk: provide devm_clk_get_optional_enabled_with_rate()

There are clock users in the kernel that can't use
devm_clk_get_optional_enabled() as they need to set rate after getting
the clock and before enabling it. Provide a managed helper that wraps
these operations in the correct order.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20240805-clk-new-helper-v2-1-e5fdd1e1d729@linaro.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0fa56d672532..851a0f2cf42c 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -640,6 +640,32 @@ struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id);
  */
 struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
 
+/**
+ * devm_clk_get_optional_enabled_with_rate - devm_clk_get_optional() +
+ *                                           clk_set_rate() +
+ *                                           clk_prepare_enable()
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ * @rate: new clock rate
+ *
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno.  The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer.  If no such clk is found, it returns NULL
+ * which serves as a dummy clk.  That's the only difference compared
+ * to devm_clk_get_enabled().
+ *
+ * The returned clk (if valid) is prepared and enabled and rate was set.
+ *
+ * The clock will automatically be disabled, unprepared and freed
+ * when the device is unbound from the bus.
+ */
+struct clk *devm_clk_get_optional_enabled_with_rate(struct device *dev,
+						    const char *id,
+						    unsigned long rate);
+
 /**
  * devm_get_clk_from_child - lookup and obtain a managed reference to a
  *			     clock producer from child node.
@@ -982,6 +1008,13 @@ static inline struct clk *devm_clk_get_optional_enabled(struct device *dev,
 	return NULL;
 }
 
+static inline struct clk *
+devm_clk_get_optional_enabled_with_rate(struct device *dev, const char *id,
+					unsigned long rate)
+{
+	return NULL;
+}
+
 static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 						 struct clk_bulk_data *clks)
 {
-- 
cgit v1.2.3


From 08062af0a52107a243f7608fd972edb54ca5b7f8 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Wed, 4 Sep 2024 15:34:30 +0000
Subject: net: napi: Prevent overflow of napi_defer_hard_irqs

In commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature")
napi_defer_irqs was added to net_device and napi_defer_irqs_count was
added to napi_struct, both as type int.

This value never goes below zero, so there is not reason for it to be a
signed int. Change the type for both from int to u32, and add an
overflow check to sysfs to limit the value to S32_MAX.

The limit of S32_MAX was chosen because the practical limit before this
patch was S32_MAX (anything larger was an overflow) and thus there are
no behavioral changes introduced. If the extra bit is needed in the
future, the limit can be raised.

Before this patch:

$ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs'
$ cat /sys/class/net/eth4/napi_defer_hard_irqs
-2147483647

After this patch:

$ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs'
bash: line 0: echo: write error: Numerical result out of range

Similarly, /sys/class/net/XXXXX/tx_queue_len is defined as unsigned:

include/linux/netdevice.h:      unsigned int            tx_queue_len;

And has an overflow check:

dev_change_tx_queue_len(..., unsigned long new_len):

  if (new_len != (unsigned int)new_len)
          return -ERANGE;

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240904153431.307932-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5f0dda733b..b47c00657bd0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -356,7 +356,7 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
-	int			defer_hard_irqs_count;
+	u32			defer_hard_irqs_count;
 	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
@@ -2075,7 +2075,7 @@ struct net_device {
 	unsigned int		real_num_rx_queues;
 	struct netdev_rx_queue	*_rx;
 	unsigned long		gro_flush_timeout;
-	int			napi_defer_hard_irqs;
+	u32			napi_defer_hard_irqs;
 	unsigned int		gro_max_size;
 	unsigned int		gro_ipv4_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
-- 
cgit v1.2.3


From de35996d4b364749194c5b473d1912578880833e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 4 Aug 2024 18:47:08 -0700
Subject: Input: matrix_keypad - remove support for platform data

There are no more users of struct matrix_keypad_platform_data in the
kernel, remove support for it from the driver.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240805014710.1961677-6-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/matrix_keypad.h | 48 -------------------------------------
 1 file changed, 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index b8d8d69eba29..90867f44ab4d 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -34,52 +34,6 @@ struct matrix_keymap_data {
 	unsigned int	keymap_size;
 };
 
-/**
- * struct matrix_keypad_platform_data - platform-dependent keypad data
- * @keymap_data: pointer to &matrix_keymap_data
- * @row_gpios: pointer to array of gpio numbers representing rows
- * @col_gpios: pointer to array of gpio numbers reporesenting colums
- * @num_row_gpios: actual number of row gpios used by device
- * @num_col_gpios: actual number of col gpios used by device
- * @col_scan_delay_us: delay, measured in microseconds, that is
- *	needed before we can keypad after activating column gpio
- * @debounce_ms: debounce interval in milliseconds
- * @clustered_irq: may be specified if interrupts of all row/column GPIOs
- *	are bundled to one single irq
- * @clustered_irq_flags: flags that are needed for the clustered irq
- * @active_low: gpio polarity
- * @wakeup: controls whether the device should be set up as wakeup
- *	source
- * @no_autorepeat: disable key autorepeat
- * @drive_inactive_cols: drive inactive columns during scan, rather than
- *	making them inputs.
- *
- * This structure represents platform-specific data that use used by
- * matrix_keypad driver to perform proper initialization.
- */
-struct matrix_keypad_platform_data {
-	const struct matrix_keymap_data *keymap_data;
-
-	const unsigned int *row_gpios;
-	const unsigned int *col_gpios;
-
-	unsigned int	num_row_gpios;
-	unsigned int	num_col_gpios;
-
-	unsigned int	col_scan_delay_us;
-
-	/* key debounce interval in milli-second */
-	unsigned int	debounce_ms;
-
-	unsigned int	clustered_irq;
-	unsigned int	clustered_irq_flags;
-
-	bool		active_low;
-	bool		wakeup;
-	bool		no_autorepeat;
-	bool		drive_inactive_cols;
-};
-
 int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data,
 			       const char *keymap_name,
 			       unsigned int rows, unsigned int cols,
@@ -88,6 +42,4 @@ int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data,
 int matrix_keypad_parse_properties(struct device *dev,
 				   unsigned int *rows, unsigned int *cols);
 
-#define matrix_keypad_parse_of_params matrix_keypad_parse_properties
-
 #endif /* _MATRIX_KEYPAD_H */
-- 
cgit v1.2.3


From f820b43754cdbd078a0d17122b281f42cbcd2155 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 23 Aug 2024 22:50:27 -0700
Subject: Input: zforce_ts - remove support for platfrom data

There are no in-tree users of platform data and any new ones should
either use device tree or static device properties, so let's remove
platform data support.

Tested-by: Andreas Kemnade <andreas@kemnade.info> # Tolino Shine2HD
Link: https://lore.kernel.org/r/20240824055047.1706392-4-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/zforce_ts.h | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 include/linux/platform_data/zforce_ts.h

(limited to 'include')

diff --git a/include/linux/platform_data/zforce_ts.h b/include/linux/platform_data/zforce_ts.h
deleted file mode 100644
index 2463a4a856a6..000000000000
--- a/include/linux/platform_data/zforce_ts.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* drivers/input/touchscreen/zforce.c
- *
- * Copyright (C) 2012-2013 MundoReader S.L.
- */
-
-#ifndef _LINUX_INPUT_ZFORCE_TS_H
-#define _LINUX_INPUT_ZFORCE_TS_H
-
-struct zforce_ts_platdata {
-	unsigned int x_max;
-	unsigned int y_max;
-};
-
-#endif /* _LINUX_INPUT_ZFORCE_TS_H */
-- 
cgit v1.2.3


From 090624b7dc8389a516df3adb447a25dc0723adfe Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 5 Sep 2024 16:12:52 +0200
Subject: ALSA: pcm: add more sample rate definitions

This adds a sample rate definition for 12kHz, 24kHz and 128kHz.

Admittedly, just a few drivers are currently using these sample
rates but there is enough of a recurrence to justify adding a definition
for them and remove some custom rate constraint code while at it.

The new definitions are not added to the interval definitions, such as
SNDRV_PCM_RATE_8000_44100, because it would silently add new supported
rates to drivers that may or may not support them. For sure the drivers
have not been tested for these new rates so it is better to leave them out
of interval definitions.

That being said, the added rates are multiples of well know rates families,
it is very likely that a lot of devices out there actually supports them.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: David Rhodes <drhodes@opensource.cirrus.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240905-alsa-12-24-128-v1-1-8371948d3921@baylibre.com
---
 include/sound/pcm.h | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 732121b934fd..c993350975a9 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -109,20 +109,23 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_RATE_5512		(1U<<0)		/* 5512Hz */
 #define SNDRV_PCM_RATE_8000		(1U<<1)		/* 8000Hz */
 #define SNDRV_PCM_RATE_11025		(1U<<2)		/* 11025Hz */
-#define SNDRV_PCM_RATE_16000		(1U<<3)		/* 16000Hz */
-#define SNDRV_PCM_RATE_22050		(1U<<4)		/* 22050Hz */
-#define SNDRV_PCM_RATE_32000		(1U<<5)		/* 32000Hz */
-#define SNDRV_PCM_RATE_44100		(1U<<6)		/* 44100Hz */
-#define SNDRV_PCM_RATE_48000		(1U<<7)		/* 48000Hz */
-#define SNDRV_PCM_RATE_64000		(1U<<8)		/* 64000Hz */
-#define SNDRV_PCM_RATE_88200		(1U<<9)		/* 88200Hz */
-#define SNDRV_PCM_RATE_96000		(1U<<10)	/* 96000Hz */
-#define SNDRV_PCM_RATE_176400		(1U<<11)	/* 176400Hz */
-#define SNDRV_PCM_RATE_192000		(1U<<12)	/* 192000Hz */
-#define SNDRV_PCM_RATE_352800		(1U<<13)	/* 352800Hz */
-#define SNDRV_PCM_RATE_384000		(1U<<14)	/* 384000Hz */
-#define SNDRV_PCM_RATE_705600		(1U<<15)	/* 705600Hz */
-#define SNDRV_PCM_RATE_768000		(1U<<16)	/* 768000Hz */
+#define SNDRV_PCM_RATE_12000		(1U<<3)		/* 12000Hz */
+#define SNDRV_PCM_RATE_16000		(1U<<4)		/* 16000Hz */
+#define SNDRV_PCM_RATE_22050		(1U<<5)		/* 22050Hz */
+#define SNDRV_PCM_RATE_24000		(1U<<6)		/* 24000Hz */
+#define SNDRV_PCM_RATE_32000		(1U<<7)		/* 32000Hz */
+#define SNDRV_PCM_RATE_44100		(1U<<8)		/* 44100Hz */
+#define SNDRV_PCM_RATE_48000		(1U<<9)		/* 48000Hz */
+#define SNDRV_PCM_RATE_64000		(1U<<10)	/* 64000Hz */
+#define SNDRV_PCM_RATE_88200		(1U<<11)	/* 88200Hz */
+#define SNDRV_PCM_RATE_96000		(1U<<12)	/* 96000Hz */
+#define SNDRV_PCM_RATE_128000		(1U<<13)	/* 128000Hz */
+#define SNDRV_PCM_RATE_176400		(1U<<14)	/* 176400Hz */
+#define SNDRV_PCM_RATE_192000		(1U<<15)	/* 192000Hz */
+#define SNDRV_PCM_RATE_352800		(1U<<16)	/* 352800Hz */
+#define SNDRV_PCM_RATE_384000		(1U<<17)	/* 384000Hz */
+#define SNDRV_PCM_RATE_705600		(1U<<18)	/* 705600Hz */
+#define SNDRV_PCM_RATE_768000		(1U<<19)	/* 768000Hz */
 
 #define SNDRV_PCM_RATE_CONTINUOUS	(1U<<30)	/* continuous range */
 #define SNDRV_PCM_RATE_KNOT		(1U<<31)	/* supports more non-continuous rates */
-- 
cgit v1.2.3


From 23de126f9248a2f9218175ea0dd1c1403e334440 Mon Sep 17 00:00:00 2001
From: Pieter Van Trappen <pieter.van.trappen@cern.ch>
Date: Wed, 4 Sep 2024 08:27:42 +0200
Subject: net: dsa: microchip: replace unclear KSZ8830 strings

Replace ksz8830 with ksz88x3 for CHIP_ID definition and other
strings. This due to KSZ8830 not being an actual switch but the Chip
ID shared among KSZ8863/8873 switches, impossible to differentiate
from their Chip ID or Revision ID registers.

Now all KSZ*_CHIP_ID macros refer to actual, existing switches which
removes confusion.

Signed-off-by: Pieter Van Trappen <pieter.van.trappen@cern.ch>
Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/platform_data/microchip-ksz.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
index d074019474f5..2ee1a679e592 100644
--- a/include/linux/platform_data/microchip-ksz.h
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -27,7 +27,7 @@ enum ksz_chip_id {
 	KSZ8795_CHIP_ID = 0x8795,
 	KSZ8794_CHIP_ID = 0x8794,
 	KSZ8765_CHIP_ID = 0x8765,
-	KSZ8830_CHIP_ID = 0x8830,
+	KSZ88X3_CHIP_ID = 0x8830,
 	KSZ8864_CHIP_ID = 0x8864,
 	KSZ8895_CHIP_ID = 0x8895,
 	KSZ9477_CHIP_ID = 0x00947700,
-- 
cgit v1.2.3


From 62c16f219a73c237b5bef9bd160e32fbb38794f9 Mon Sep 17 00:00:00 2001
From: Aditya Kumar Singh <quic_adisi@quicinc.com>
Date: Fri, 6 Sep 2024 12:14:22 +0530
Subject: wifi: cfg80211: move DFS related members to links[] in wireless_dev

A few members related to DFS handling are currently under per wireless
device data structure. However, in order to support DFS with MLO, there is
a need to have them on a per-link manner.

Hence, as a preliminary step, move members cac_started, cac_start_time
and cac_time_ms to be on a per-link basis.

Since currently, link ID is not known at all places, use default value of
0 for now.

Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
Link: https://patch.msgid.link/20240906064426.2101315-5-quic_adisi@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 192d72c8b465..727a94a6b7c3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6194,9 +6194,6 @@ enum ieee80211_ap_reg_power {
  * @address: The address for this device, valid only if @netdev is %NULL
  * @is_running: true if this is a non-netdev device that has been started, e.g.
  *	the P2P Device.
- * @cac_started: true if DFS channel availability check has been started
- * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
- * @cac_time_ms: CAC time in ms
  * @ps: powersave mode is enabled
  * @ps_timeout: dynamic powersave timeout
  * @ap_unexpected_nlportid: (private) netlink port ID of application
@@ -6220,6 +6217,11 @@ enum ieee80211_ap_reg_power {
  *	unprotected beacon report
  * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr
  *	@ap and @client for each link
+ * @links[].cac_started: true if DFS channel availability check has been
+ *	started
+ * @links[].cac_start_time: timestamp (jiffies) when the dfs state was
+ *	entered.
+ * @links[].cac_time_ms: CAC time in ms
  * @valid_links: bitmap describing what elements of @links are valid
  */
 struct wireless_dev {
@@ -6261,11 +6263,6 @@ struct wireless_dev {
 	u32 owner_nlportid;
 	bool nl_owner_dead;
 
-	/* FIXME: need to rework radar detection for MLO */
-	bool cac_started;
-	unsigned long cac_start_time;
-	unsigned int cac_time_ms;
-
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
@@ -6332,6 +6329,10 @@ struct wireless_dev {
 				struct cfg80211_internal_bss *current_bss;
 			} client;
 		};
+
+		bool cac_started;
+		unsigned long cac_start_time;
+		unsigned int cac_time_ms;
 	} links[IEEE80211_MLD_MAX_NUM_LINKS];
 	u16 valid_links;
 };
-- 
cgit v1.2.3


From 81f67d60ebf290da068af96ad0c4a4e4faebdf1d Mon Sep 17 00:00:00 2001
From: Aditya Kumar Singh <quic_adisi@quicinc.com>
Date: Fri, 6 Sep 2024 12:14:23 +0530
Subject: wifi: cfg80211: handle DFS per link

Currently, during starting a radar detection, no link id information is
parsed and passed down. In order to support starting radar detection
during Multi Link Operation, it is required to pass link id as well.

Add changes to first parse and then pass link id in the start radar
detection path.

Additionally, update notification APIs to allow drivers/mac80211 to
pass the link ID.

However, everything is handled at link 0 only until all API's are ready to
handle it per link.

Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
Link: https://patch.msgid.link/20240906064426.2101315-6-quic_adisi@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 727a94a6b7c3..efdea667cb92 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4837,9 +4837,9 @@ struct cfg80211_ops {
 	int	(*start_radar_detection)(struct wiphy *wiphy,
 					 struct net_device *dev,
 					 struct cfg80211_chan_def *chandef,
-					 u32 cac_time_ms);
+					 u32 cac_time_ms, int link_id);
 	void	(*end_cac)(struct wiphy *wiphy,
-				struct net_device *dev);
+			   struct net_device *dev, unsigned int link_id);
 	int	(*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
 				 struct cfg80211_update_ft_ies_params *ftie);
 	int	(*crit_proto_start)(struct wiphy *wiphy,
@@ -8741,6 +8741,7 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
  * @chandef: chandef for the current channel
  * @event: type of event
  * @gfp: context flags
+ * @link_id: valid link_id for MLO operation or 0 otherwise.
  *
  * This function is called when a Channel availability check (CAC) is finished
  * or aborted. This must be called to notify the completion of a CAC process,
@@ -8748,7 +8749,8 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
  */
 void cfg80211_cac_event(struct net_device *netdev,
 			const struct cfg80211_chan_def *chandef,
-			enum nl80211_radar_event event, gfp_t gfp);
+			enum nl80211_radar_event event, gfp_t gfp,
+			unsigned int link_id);
 
 /**
  * cfg80211_background_cac_abort - Channel Availability Check offchan abort event
-- 
cgit v1.2.3


From bca8bc0399ac2efd56e6adbed0307e10125a556c Mon Sep 17 00:00:00 2001
From: Aditya Kumar Singh <quic_adisi@quicinc.com>
Date: Fri, 6 Sep 2024 12:14:26 +0530
Subject: wifi: mac80211: handle ieee80211_radar_detected() for MLO

Currently DFS works under assumption there could be only one channel
context in the hardware. Hence, drivers just calls the function
ieee80211_radar_detected() passing the hardware structure. However, with
MLO, this obviously will not work since number of channel contexts will be
more than one and hence drivers would need to pass the channel information
as well on which the radar is detected.

Also, when radar is detected in one of the links, other link's CAC should
not be cancelled.

Hence, in order to support DFS with MLO, do the following changes -
  * Add channel context conf pointer as an argument to the function
    ieee80211_radar_detected(). During MLO, drivers would have to pass on
    which channel context conf radar is detected. Otherwise, drivers could
    just pass NULL.
  * ieee80211_radar_detected() will iterate over all channel contexts
    present and
  	* if channel context conf is passed, only mark that as radar
  	  detected
  	* if NULL is passed, then mark all channel contexts as radar
  	  detected
  	* Then as usual, schedule the radar detected work.
  * In the worker, go over all the contexts again and for all such context
    which is marked with radar detected, cancel the ongoing CAC by calling
    ieee80211_dfs_cac_cancel() and then notify cfg80211 via
    cfg80211_radar_event().
  * To cancel the CAC, pass the channel context as well where radar is
    detected to ieee80211_dfs_cac_cancel(). This ensures that CAC is
    canceled only on the links using the provided context, leaving other
    links unaffected.

This would also help in scenarios where there is split phy 5 GHz radio,
which is capable of DFS channels in both lower and upper band. In this
case, simultaneous radars can be detected.

Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
Link: https://patch.msgid.link/20240906064426.2101315-9-quic_adisi@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index adfec877f392..954dff901b69 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6748,8 +6748,11 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp);
  * ieee80211_radar_detected - inform that a radar was detected
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @chanctx_conf: Channel context on which radar is detected. Mandatory to
+ *	pass a valid pointer during MLO. For non-MLO %NULL can be passed
  */
-void ieee80211_radar_detected(struct ieee80211_hw *hw);
+void ieee80211_radar_detected(struct ieee80211_hw *hw,
+			      struct ieee80211_chanctx_conf *chanctx_conf);
 
 /**
  * ieee80211_chswitch_done - Complete channel switch process
-- 
cgit v1.2.3


From 56bd72e9cd8272687aa2a8eccddabc5526cd7845 Mon Sep 17 00:00:00 2001
From: Marek Maslanka <mmaslanka@google.com>
Date: Mon, 12 Aug 2024 18:41:42 +0000
Subject: clocksource: acpi_pm: Add external callback for suspend/resume

Provides the capability to register an external callback for the ACPI PM
timer, which is called during the suspend and resume processes.

Signed-off-by: Marek Maslanka <mmaslanka@google.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240812184150.1079924-1-mmaslanka@google.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/acpi_pmtmr.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
index 50d88bf1498d..0ded9220d379 100644
--- a/include/linux/acpi_pmtmr.h
+++ b/include/linux/acpi_pmtmr.h
@@ -26,6 +26,19 @@ static inline u32 acpi_pm_read_early(void)
 	return acpi_pm_read_verified() & ACPI_PM_MASK;
 }
 
+/**
+ * Register callback for suspend and resume event
+ *
+ * @cb Callback triggered on suspend and resume
+ * @data Data passed with the callback
+ */
+void acpi_pmtmr_register_suspend_resume_callback(void (*cb)(void *data, bool suspend), void *data);
+
+/**
+ * Remove registered callback for suspend and resume event
+ */
+void acpi_pmtmr_unregister_suspend_resume_callback(void);
+
 #else
 
 static inline u32 acpi_pm_read_early(void)
-- 
cgit v1.2.3


From 4849b2f78020cf0e3ba67777aadd07c620c91811 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 5 Sep 2024 05:14:29 +0000
Subject: ASoC: makes rtd->initialized bit field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rtd->initialized is used to know whether soc_init_pcm_runtime()
was correctly fined, and used to call snd_soc_link_exit().
We don't need to have it as bool, let's make it bit-field same as
other flags.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Cc: Cezary Rojewski <cezary.rojewski@intel.com>
Link: https://patch.msgid.link/87o752k7gq.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 81ef380b2e06..e6e359c1a2ac 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1207,8 +1207,7 @@ struct snd_soc_pcm_runtime {
 	/* bit field */
 	unsigned int pop_wait:1;
 	unsigned int fe_compr:1; /* for Dynamic PCM */
-
-	bool initialized;
+	unsigned int initialized:1;
 
 	/* CPU/Codec/Platform */
 	int num_components;
-- 
cgit v1.2.3


From e49dacc71ec2621ce4c422cd5605d4d06f7807b0 Mon Sep 17 00:00:00 2001
From: Wouter Verhelst <w@uter.be>
Date: Mon, 12 Aug 2024 15:20:37 +0200
Subject: nbd: implement the WRITE_ZEROES command

The NBD protocol defines a message for zeroing out a region of an export

Add support to the kernel driver for that message.

Signed-off-by: Wouter Verhelst <w@uter.be>
Cc: Eric Blake <eblake@redhat.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240812133032.115134-3-w@uter.be
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/nbd.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index d75215f2c675..f1d468acfb25 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -42,8 +42,9 @@ enum {
 	NBD_CMD_WRITE = 1,
 	NBD_CMD_DISC = 2,
 	NBD_CMD_FLUSH = 3,
-	NBD_CMD_TRIM = 4
+	NBD_CMD_TRIM = 4,
 	/* userspace defines additional extension commands */
+	NBD_CMD_WRITE_ZEROES = 6,
 };
 
 /* values for flags field, these are server interaction specific. */
@@ -53,11 +54,13 @@ enum {
 #define NBD_FLAG_SEND_FUA	(1 << 3) /* send FUA (forced unit access) */
 #define NBD_FLAG_ROTATIONAL	(1 << 4) /* device is rotational */
 #define NBD_FLAG_SEND_TRIM	(1 << 5) /* send trim/discard */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* supports WRITE_ZEROES */
 /* there is a gap here to match userspace */
 #define NBD_FLAG_CAN_MULTI_CONN	(1 << 8)	/* Server supports multiple connections per export. */
 
 /* values for cmd flags in the upper 16 bits of request type */
 #define NBD_CMD_FLAG_FUA	(1 << 16) /* FUA (forced unit access) op */
+#define NBD_CMD_FLAG_NO_HOLE	(1 << 17) /* Do not punch a hole for WRITE_ZEROES */
 
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT	(1 << 0) /* delete the nbd device on
-- 
cgit v1.2.3


From 3f4c0ad490cc55db5b375a2e19c67159cb255795 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 26 Aug 2024 12:14:04 +0200
Subject: mtd: nand: Rename the NAND IO iteration helper

Soon a helper for iterating over blocks will be needed (for continuous
read purposes). In order to clarify the intend of this helper, let's
rename it with the "page" wording inside.

While at it, improve the doc and fix a typo.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-2-miquel.raynal@bootlin.com
---
 include/linux/mtd/nand.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index b2996dc987ff..92e06ac33815 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -906,19 +906,19 @@ static inline void nanddev_pos_next_page(struct nand_device *nand,
 }
 
 /**
- * nand_io_iter_init - Initialize a NAND I/O iterator
+ * nand_io_page_iter_init - Initialize a NAND I/O iterator
  * @nand: NAND device
  * @offs: absolute offset
  * @req: MTD request
  * @iter: NAND I/O iterator
  *
  * Initializes a NAND iterator based on the information passed by the MTD
- * layer.
+ * layer for page jumps.
  */
-static inline void nanddev_io_iter_init(struct nand_device *nand,
-					enum nand_page_io_req_type reqtype,
-					loff_t offs, struct mtd_oob_ops *req,
-					struct nand_io_iter *iter)
+static inline void nanddev_io_page_iter_init(struct nand_device *nand,
+					     enum nand_page_io_req_type reqtype,
+					     loff_t offs, struct mtd_oob_ops *req,
+					     struct nand_io_iter *iter)
 {
 	struct mtd_info *mtd = nanddev_to_mtd(nand);
 
@@ -990,10 +990,10 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand,
  * @req: MTD I/O request
  * @iter: NAND I/O iterator
  *
- * Should be used for iterate over pages that are contained in an MTD request.
+ * Should be used for iterating over pages that are contained in an MTD request.
  */
 #define nanddev_io_for_each_page(nand, type, start, req, iter)		\
-	for (nanddev_io_iter_init(nand, type, start, req, iter);	\
+	for (nanddev_io_page_iter_init(nand, type, start, req, iter);	\
 	     !nanddev_io_iter_end(nand, iter);				\
 	     nanddev_io_iter_next_page(nand, iter))
 
-- 
cgit v1.2.3


From 8adf1ac24ba8ee34b8fd36ebf159004ec472fca0 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 26 Aug 2024 12:14:05 +0200
Subject: mtd: nand: Introduce a block iterator

In order to be able to iterate easily across eraseblocks rather than
pages, let's introduce a block iterator inspired from the page iterator.

The main usage of this iterator will be for continuous/sequential reads,
where it is interesting to use a single request rather than split the
requests in smaller chunks (ie. pages) that can be hardly optimized.

So a "continuous" boolean get's added for this purpose.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-3-miquel.raynal@bootlin.com
---
 include/linux/mtd/nand.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 92e06ac33815..1e4208040956 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -103,6 +103,8 @@ enum nand_page_io_req_type {
  * @ooblen: the number of OOB bytes to read from/write to this page
  * @oobbuf: buffer to store OOB data in or get OOB data from
  * @mode: one of the %MTD_OPS_XXX mode
+ * @continuous: no need to start over the operation at the end of each page, the
+ * NAND device will automatically prepare the next one
  *
  * This object is used to pass per-page I/O requests to NAND sub-layers. This
  * way all useful information are already formatted in a useful way and
@@ -125,6 +127,7 @@ struct nand_page_io_req {
 		void *in;
 	} oobbuf;
 	int mode;
+	bool continuous;
 };
 
 const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void);
@@ -937,6 +940,43 @@ static inline void nanddev_io_page_iter_init(struct nand_device *nand,
 	iter->req.ooblen = min_t(unsigned int,
 				 iter->oobbytes_per_page - iter->req.ooboffs,
 				 iter->oobleft);
+	iter->req.continuous = false;
+}
+
+/**
+ * nand_io_block_iter_init - Initialize a NAND I/O iterator
+ * @nand: NAND device
+ * @offs: absolute offset
+ * @req: MTD request
+ * @iter: NAND I/O iterator
+ *
+ * Initializes a NAND iterator based on the information passed by the MTD
+ * layer for block jumps (no OOB)
+ *
+ * In practice only reads may leverage this iterator.
+ */
+static inline void nanddev_io_block_iter_init(struct nand_device *nand,
+					      enum nand_page_io_req_type reqtype,
+					      loff_t offs, struct mtd_oob_ops *req,
+					      struct nand_io_iter *iter)
+{
+	unsigned int offs_in_eb;
+
+	iter->req.type = reqtype;
+	iter->req.mode = req->mode;
+	iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos);
+	iter->req.ooboffs = 0;
+	iter->oobbytes_per_page = 0;
+	iter->dataleft = req->len;
+	iter->oobleft = 0;
+	iter->req.databuf.in = req->datbuf;
+	offs_in_eb = (nand->memorg.pagesize * iter->req.pos.page) + iter->req.dataoffs;
+	iter->req.datalen = min_t(unsigned int,
+				  nanddev_eraseblock_size(nand) - offs_in_eb,
+				  iter->dataleft);
+	iter->req.oobbuf.in = NULL;
+	iter->req.ooblen = 0;
+	iter->req.continuous = true;
 }
 
 /**
@@ -962,6 +1002,25 @@ static inline void nanddev_io_iter_next_page(struct nand_device *nand,
 				 iter->oobleft);
 }
 
+/**
+ * nand_io_iter_next_block - Move to the next block
+ * @nand: NAND device
+ * @iter: NAND I/O iterator
+ *
+ * Updates the @iter to point to the next block.
+ * No OOB handling available.
+ */
+static inline void nanddev_io_iter_next_block(struct nand_device *nand,
+					      struct nand_io_iter *iter)
+{
+	nanddev_pos_next_eraseblock(nand, &iter->req.pos);
+	iter->dataleft -= iter->req.datalen;
+	iter->req.databuf.in += iter->req.datalen;
+	iter->req.dataoffs = 0;
+	iter->req.datalen = min_t(unsigned int, nanddev_eraseblock_size(nand),
+				  iter->dataleft);
+}
+
 /**
  * nand_io_iter_end - Should end iteration or not
  * @nand: NAND device
@@ -997,6 +1056,21 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand,
 	     !nanddev_io_iter_end(nand, iter);				\
 	     nanddev_io_iter_next_page(nand, iter))
 
+/**
+ * nand_io_for_each_block - Iterate over all NAND pages contained in an MTD I/O
+ *			    request, one block at a time
+ * @nand: NAND device
+ * @start: start address to read/write from
+ * @req: MTD I/O request
+ * @iter: NAND I/O iterator
+ *
+ * Should be used for iterating over blocks that are contained in an MTD request.
+ */
+#define nanddev_io_for_each_block(nand, type, start, req, iter)		\
+	for (nanddev_io_block_iter_init(nand, type, start, req, iter);	\
+	     !nanddev_io_iter_end(nand, iter);				\
+	     nanddev_io_iter_next_block(nand, iter))
+
 bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos);
 bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos);
 int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos);
-- 
cgit v1.2.3


From 631cfdd0520d19b7f4fc13b834fd9c8b46c6dbac Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 26 Aug 2024 12:14:07 +0200
Subject: mtd: spi-nand: Add continuous read support

A regular page read consist in:
- Asking one page of content from the NAND array to be loaded in the
  chip's SRAM,
- Waiting for the operation to be done,
- Retrieving the data (I/O phase) from the chip's SRAM.

When reading several sequential pages, the above operation is repeated
over and over. There is however a way to optimize these accesses, by
enabling continuous reads. The feature requires the NAND chip to have a
second internal SRAM area plus a bit of additional internal logic to
trigger another internal transfer between the NAND array and the second
SRAM area while the I/O phase is ongoing. Once the first I/O phase is
done, the host can continue reading more data, continuously, as the chip
will automatically switch to the second SRAM content (which has already
been loaded) and in turns trigger the next load into the first SRAM area
again.

From an instruction perspective, the command op-codes are different, but
the same cycles are required. The only difference is that after a
continuous read (which is stopped by a CS deassert), the host must
observe a delay of tRST. However, because there is no guarantee in Linux
regarding the actual state of the CS pin after a transfer (in order to
speed-up the next transfer if targeting the same device), it was
necessary to manually end the continuous read with a configuration
register write operation.

Continuous reads have two main drawbacks:
* They only work on full pages (column address ignored)
* Only the main data area is pulled, out-of-band bytes are not
  accessible. Said otherwise, the feature can only be useful with on-die
  ECC engines.

Performance wise, measures have been performed on a Zynq platform using
Macronix SPI-NAND controller with a Macronix chip (based on the
flash_speed tool modified for testing sequential reads):
- 1-1-1 mode: performances improved from +3% (2-pages) up to +10% after
              a dozen pages.
- 1-1-4 mode: performances improved from +15% (2-pages) up to +40% after
              a dozen pages.

This series is based on a previous work from Macronix engineer Jaime
Liao.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-5-miquel.raynal@bootlin.com
---
 include/linux/mtd/spinand.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 5c19ead60499..14dce347dc46 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -336,6 +336,7 @@ struct spinand_ondie_ecc_conf {
  * @op_variants.update_cache: variants of the update-cache operation
  * @select_target: function used to select a target/die. Required only for
  *		   multi-die chips
+ * @set_cont_read: enable/disable continuous cached reads
  *
  * Each SPI NAND manufacturer driver should have a spinand_info table
  * describing all the chips supported by the driver.
@@ -354,6 +355,8 @@ struct spinand_info {
 	} op_variants;
 	int (*select_target)(struct spinand_device *spinand,
 			     unsigned int target);
+	int (*set_cont_read)(struct spinand_device *spinand,
+			     bool enable);
 };
 
 #define SPINAND_ID(__method, ...)					\
@@ -379,6 +382,9 @@ struct spinand_info {
 #define SPINAND_SELECT_TARGET(__func)					\
 	.select_target = __func,
 
+#define SPINAND_CONT_READ(__set_cont_read)				\
+	.set_cont_read = __set_cont_read,
+
 #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants,	\
 		     __flags, ...)					\
 	{								\
@@ -422,6 +428,12 @@ struct spinand_dirmap {
  *		passed in spi_mem_op be DMA-able, so we can't based the bufs on
  *		the stack
  * @manufacturer: SPI NAND manufacturer information
+ * @cont_read_possible: Field filled by the core once the whole system
+ *		configuration is known to tell whether continuous reads are
+ *		suitable to use or not in general with this chip/configuration.
+ *		A per-transfer check must of course be done to ensure it is
+ *		actually relevant to enable this feature.
+ * @set_cont_read: Enable/disable the continuous read feature
  * @priv: manufacturer private data
  */
 struct spinand_device {
@@ -451,6 +463,10 @@ struct spinand_device {
 	u8 *scratchbuf;
 	const struct spinand_manufacturer *manufacturer;
 	void *priv;
+
+	bool cont_read_possible;
+	int (*set_cont_read)(struct spinand_device *spinand,
+			     bool enable);
 };
 
 /**
-- 
cgit v1.2.3


From a06f2e7cc4de104b14971e08d37f9d08c256b053 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 26 Aug 2024 12:14:08 +0200
Subject: mtd: spi-nand: Expose spinand_write_reg_op()

This helper function will soon be used from a vendor driver, let's
export it through the spinand.h header. No need for any export, as there
is currently no reason for any module to need it.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-6-miquel.raynal@bootlin.com
---
 include/linux/mtd/spinand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 14dce347dc46..8dbf67ca710a 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -533,6 +533,7 @@ int spinand_match_and_init(struct spinand_device *spinand,
 			   enum spinand_readid_method rdid_method);
 
 int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);
+int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val);
 int spinand_select_target(struct spinand_device *spinand, unsigned int target);
 
 #endif /* __LINUX_MTD_SPINAND_H */
-- 
cgit v1.2.3


From bb2ac59f8205165fce5aee9135bd1e2ea9b1a74b Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 16 Aug 2024 01:18:23 +0100
Subject: mfd: axp20x: AXP717: Add support for boost regulator

The AXP717 also contains a boost regulator, to provide the 5V USB VBUS
rail when running on battery.

Add the registers to the MFD description to be able to use them from the
regulator driver.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: John Watts <contact@jookia.org>
Acked-by: Lee Jones <lee@kernel.org>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://patch.msgid.link/20240816001824.6028-3-andre.przywara@arm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/axp20x.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 8c0a33a2e9ce..3758f986491c 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -115,6 +115,8 @@ enum axp20x_variants {
 #define AXP313A_IRQ_STATE		0x21
 
 #define AXP717_ON_INDICATE		0x00
+#define AXP717_MODULE_EN_CONTROL_2	0x19
+#define AXP717_BOOST_CONTROL		0x1e
 #define AXP717_IRQ0_EN			0x40
 #define AXP717_IRQ1_EN			0x41
 #define AXP717_IRQ2_EN			0x42
-- 
cgit v1.2.3


From 22dfe2ea1d63f15d8b9661e7690ac0a03159db6a Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 16 Aug 2024 01:18:24 +0100
Subject: regulator: axp20x: AXP717: Add boost regulator

The AXP717 also contains an adjustable boost regulator, to provide the
5V USB VBUS rail when running on battery.

Add the regulator description that states the voltage range this
regulator can cover.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: John Watts <contact@jookia.org>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://patch.msgid.link/20240816001824.6028-4-andre.przywara@arm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/axp20x.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 3758f986491c..e0cd66bd3b6d 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -486,6 +486,7 @@ enum {
 	AXP717_CLDO3,
 	AXP717_CLDO4,
 	AXP717_CPUSLDO,
+	AXP717_BOOST,
 	AXP717_REG_ID_MAX,
 };
 
-- 
cgit v1.2.3


From 663b0f1e141dc60ce6c09ae6afc5f213b22d13ca Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Fri, 16 Feb 2024 11:00:10 -0500
Subject: drm/amdkfd: Document and define SVM events message macro

Document how to use SMI system management interface to enable and
receive SVM events. Document SVM event triggers.

Define SVM events message string format macro that could be used by user
mode for sscanf to parse the event. Add it to uAPI header file to make
it obvious that is changing uAPI in future.

No functional changes.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 100 +++++++++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 71a7ce5f2d4c..717307d6b5b7 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -540,26 +540,29 @@ enum kfd_smi_event {
 	KFD_SMI_EVENT_ALL_PROCESS = 64
 };
 
+/* The reason of the page migration event */
 enum KFD_MIGRATE_TRIGGERS {
-	KFD_MIGRATE_TRIGGER_PREFETCH,
-	KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-	KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
-	KFD_MIGRATE_TRIGGER_TTM_EVICTION
+	KFD_MIGRATE_TRIGGER_PREFETCH,		/* Prefetch to GPU VRAM or system memory */
+	KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,	/* GPU page fault recover */
+	KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,	/* CPU page fault recover */
+	KFD_MIGRATE_TRIGGER_TTM_EVICTION	/* TTM eviction */
 };
 
+/* The reason of user queue evition event */
 enum KFD_QUEUE_EVICTION_TRIGGERS {
-	KFD_QUEUE_EVICTION_TRIGGER_SVM,
-	KFD_QUEUE_EVICTION_TRIGGER_USERPTR,
-	KFD_QUEUE_EVICTION_TRIGGER_TTM,
-	KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,
-	KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,
-	KFD_QUEUE_EVICTION_CRIU_RESTORE
+	KFD_QUEUE_EVICTION_TRIGGER_SVM,		/* SVM buffer migration */
+	KFD_QUEUE_EVICTION_TRIGGER_USERPTR,	/* userptr movement */
+	KFD_QUEUE_EVICTION_TRIGGER_TTM,		/* TTM move buffer */
+	KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,	/* GPU suspend */
+	KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,	/* CRIU checkpoint */
+	KFD_QUEUE_EVICTION_CRIU_RESTORE		/* CRIU restore */
 };
 
+/* The reason of unmap buffer from GPU event */
 enum KFD_SVM_UNMAP_TRIGGERS {
-	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
-	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
-	KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
+	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,	/* MMU notifier CPU buffer movement */
+	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */
+	KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU	/* Unmap to free the buffer */
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
@@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args {
 	__u32 anon_fd;	/* from KFD */
 };
 
+/*
+ * SVM event tracing via SMI system management interface
+ *
+ * Open event file descriptor
+ *    use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file
+ *    descriptor to receive SMI events.
+ *    If calling with sudo permission, then file descriptor can be used to receive
+ *    SVM events from all processes, otherwise, to only receive SVM events of same
+ *    process.
+ *
+ * To enable the SVM event
+ *    Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap
+ *    mask to start record the event to the kfifo, use bitmap mask combination
+ *    for multiple events. New event mask will overwrite the previous event mask.
+ *    KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo
+ *    permisson to receive SVM events from all process.
+ *
+ * To receive the event
+ *    Application can poll file descriptor to wait for the events, then read event
+ *    from the file into a buffer. Each event is one line string message, starting
+ *    with the event id, then the event specific information.
+ *
+ * To decode event information
+ *    The following event format string macro can be used with sscanf to decode
+ *    the specific event information.
+ *    event triggers: the reason to generate the event, defined as enum for unmap,
+ *    eviction and migrate events.
+ *    node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory.
+ *    addr: user mode address, in pages
+ *    size: in pages
+ *    pid: the process ID to generate the event
+ *    ns: timestamp in nanosecond-resolution, starts at system boot time but
+ *        stops during suspend
+ *    migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
+ *    rw: 'W' for write page fault, 'R' for read page fault
+ *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
+ */
+#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
+		"%x %s\n", (reset_seq_num), (reset_cause)
+
+#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\
+		"%llx:%llx\n", (bitmask), (counter)
+
+#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\
+		"%x:%s\n", (pid), (task_name)
+
+#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\
+		"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
+
+#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\
+		"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
+
+#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\
+		preferred_loc, migrate_trigger)\
+		"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
+		(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
+
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
+		"%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
+		(from), (to), (migrate_trigger)
+
+#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
+		"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
+
+#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\
+		"%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
+
+#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\
+		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
+		(node), (unmap_trigger)
+
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *
-- 
cgit v1.2.3


From 78f76b09c915d7281317d8e082c4c02f325a0366 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Wed, 17 Jul 2024 17:48:16 +0900
Subject: ata: libata: Move sata_std_hardreset() definition to libata-sata.c

Unlike ata_std_prereset() and ata_std_postreset(), the function
sata_std_hardreset() applies only to SATA devices, as its name implies.
So move its definition to libata-sata.c.

Together with this, also move the definition of sata_port_ops to
libata-sata.c, where it belongs.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 include/linux/libata.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6552e90753ae..d52ae7723c05 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1104,8 +1104,6 @@ static inline bool ata_port_is_frozen(const struct ata_port *ap)
 extern int ata_std_prereset(struct ata_link *link, unsigned long deadline);
 extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
 				int (*check_ready)(struct ata_link *link));
-extern int sata_std_hardreset(struct ata_link *link, unsigned int *class,
-			      unsigned long deadline);
 extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
 
 extern struct ata_host *ata_host_alloc(struct device *dev, int n_ports);
@@ -1229,6 +1227,8 @@ extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
 extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
 extern int sata_set_spd(struct ata_link *link);
+int sata_std_hardreset(struct ata_link *link, unsigned int *class,
+		       unsigned long deadline);
 extern int sata_link_hardreset(struct ata_link *link,
 			const unsigned int *timing, unsigned long deadline,
 			bool *online, int (*check_ready)(struct ata_link *));
@@ -1256,6 +1256,11 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
 	return -EOPNOTSUPP;
 }
 static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
+static inline int sata_std_hardreset(struct ata_link *link, unsigned int *class,
+				     unsigned long deadline)
+{
+	return -EOPNOTSUPP;
+}
 static inline int sata_link_hardreset(struct ata_link *link,
 				      const unsigned int *timing,
 				      unsigned long deadline,
-- 
cgit v1.2.3


From 10e807637f288f9963340a30ae8da9bb06beca3c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Mon, 10 Jun 2024 19:11:31 +0900
Subject: ata: libata: Rename ata_eh_read_sense_success_ncq_log()

The function ata_eh_read_sense_success_ncq_log() does more that just
reading the sense data for successful NCQ commands log page as it also
sets the sense data for all commands listed in the log page.

Rename this function to ata_eh_get_ncq_success_sense() to better
describe what the function does. Furthermore, since this function is
only called from ata_eh_get_success_sense() in libata-eh.c, there is no
need to export it and its declaration can be moved to
drivers/ata/libata.h.

To be consistent with this change, the function
ata_eh_read_sense_success_non_ncq() is also renamed to
ata_eh_get_non_ncq_success_sense().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 include/linux/libata.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index d52ae7723c05..55a6b57742bc 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1234,7 +1234,6 @@ extern int sata_link_hardreset(struct ata_link *link,
 			bool *online, int (*check_ready)(struct ata_link *));
 extern int sata_link_resume(struct ata_link *link, const unsigned int *params,
 			    unsigned long deadline);
-extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
 extern void ata_eh_analyze_ncq_error(struct ata_link *link);
 #else
 static inline const unsigned int *
@@ -1277,10 +1276,6 @@ static inline int sata_link_resume(struct ata_link *link,
 {
 	return -EOPNOTSUPP;
 }
-static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
-{
-	return -EOPNOTSUPP;
-}
 static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
 #endif
 extern int sata_link_debounce(struct ata_link *link,
-- 
cgit v1.2.3


From da65bbdd3bc1e8d2193e01167a413d90d9988c04 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Wed, 28 Aug 2024 11:07:43 +0900
Subject: ata: libata: Move sector_buf from struct ata_port to struct
 ata_device

The 512B buffer sector_buf field of struct ata_port is used for scanning
devices as well as during error recovery with ata EH. This buffer is
thus useless if a port does not have a device connected to it.
And also given that commands using this buffer are issued to devices,
and not to ports, move this buffer definition from struct ata_port to
struct ata_device.

This change slightly increases system memory usage for systems using a
port-multiplier as in that case we do not need a per-device buffer for
scanning devices (PMP does not allow parallel scanning) nor for EH (as
when entering EH we are guaranteed that all commands to all devices
connected to the PMP have completed or have been aborted). However,
this change reduces memory usage on systems that have many ports with
only few devices rives connected, which is a much more common use case
than the PMP use case.

Suggested-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 55a6b57742bc..aac38dcd2230 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -769,6 +769,9 @@ struct ata_device {
 	int			spdn_cnt;
 	/* ering is CLEAR_END, read comment above CLEAR_END */
 	struct ata_ering	ering;
+
+	/* For EH */
+	u8			sector_buf[ATA_SECT_SIZE] ____cacheline_aligned;
 };
 
 /* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are
@@ -916,7 +919,6 @@ struct ata_port {
 #endif
 	/* owned by EH */
 	u8			*ncq_sense_buf;
-	u8			sector_buf[ATA_SECT_SIZE] ____cacheline_aligned;
 };
 
 /* The following initializer overrides a method to NULL whether one of
-- 
cgit v1.2.3


From 602bcf212637633d537ee74bf39c6bc5722efb9b Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Wed, 17 Jul 2024 17:55:31 +0900
Subject: ata: libata: Improve CDL resource management

The ncq_sense_buf buffer field of struct ata_port is allocated and used
only for devices that support the Command Duration Limits (CDL) feature.
However, the cdl buffer of struct ata_device, which is used to cache the
command duration limits log page for devices supporting CDL is always
allocated as part of struct ata_device, which is wasteful of memory for
devices that do not support this feature.

Clean this up by defining both buffers as part of the new ata_cdl
structure and allocating this structure only for devices that support
the CDL feature. This new structure is attached to struct ata_device
using the cdl pointer.

The functions ata_dev_init_cdl_resources() and
ata_dev_cleanup_cdl_resources() are defined to manage this new structure
allocation, initialization and freeing when a port is removed or a
device disabled. ata_dev_init_cdl_resources() is called from
ata_dev_config_cdl() only for devices that support CDL.
ata_dev_cleanup_cdl_resources() is called from ata_dev_free_resources()
to free the ata_cdl structure when a device is being disabled by EH.

Note that the name of the former cdl log buffer of struct ata_device is
changed to desc_log_buf to make it clearer that it is a buffer for the
limit descriptors log page.

This change reduces the size of struct ata_device, thus reducing memory
usage for ATA devices that do not support the CDL feature.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index aac38dcd2230..9b4a6ff03235 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -700,6 +700,21 @@ struct ata_cpr_log {
 	struct ata_cpr		cpr[] __counted_by(nr_cpr);
 };
 
+struct ata_cdl {
+	/*
+	 * Buffer to cache the CDL log page 18h (command duration descriptors)
+	 * for SCSI-ATA translation.
+	 */
+	u8			desc_log_buf[ATA_LOG_CDL_SIZE];
+
+	/*
+	 * Buffer to handle reading the sense data for successful NCQ Commands
+	 * log page for commands using a CDL with one of the limits policy set
+	 * to 0xD (successful completion with sense data available bit set).
+	 */
+	u8			ncq_sense_log_buf[ATA_LOG_SENSE_NCQ_SIZE];
+};
+
 struct ata_device {
 	struct ata_link		*link;
 	unsigned int		devno;		/* 0 or 1 */
@@ -762,8 +777,8 @@ struct ata_device {
 	/* Concurrent positioning ranges */
 	struct ata_cpr_log	*cpr_log;
 
-	/* Command Duration Limits log support */
-	u8			cdl[ATA_LOG_CDL_SIZE];
+	/* Command Duration Limits support */
+	struct ata_cdl		*cdl;
 
 	/* error history */
 	int			spdn_cnt;
@@ -917,8 +932,6 @@ struct ata_port {
 #ifdef CONFIG_ATA_ACPI
 	struct ata_acpi_gtm	__acpi_init_gtm; /* use ata_acpi_init_gtm() */
 #endif
-	/* owned by EH */
-	u8			*ncq_sense_buf;
 };
 
 /* The following initializer overrides a method to NULL whether one of
-- 
cgit v1.2.3


From b12891c7b6d2c29a07b2da5589ee469d7bf37254 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 6 Sep 2024 11:34:16 +0200
Subject: ALSA: IEC958 definition for consumer status channel update

Add 128kHz, 352.4kHz, 384kHz and 705.6kHz.
These definitions have been found working on eARC using a Murideo
Seven Generator.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/20240906093422.2976550-1-jbrunet@baylibre.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asoundef.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/asoundef.h b/include/sound/asoundef.h
index 9fdeac19dadb..09b2c3dffb30 100644
--- a/include/sound/asoundef.h
+++ b/include/sound/asoundef.h
@@ -110,18 +110,22 @@
 #define IEC958_AES2_CON_SOURCE_UNSPEC	(0<<0)	/* unspecified */
 #define IEC958_AES2_CON_CHANNEL		(15<<4)	/* mask - channel number */
 #define IEC958_AES2_CON_CHANNEL_UNSPEC	(0<<4)	/* unspecified */
-#define IEC958_AES3_CON_FS		(15<<0)	/* mask - sample frequency */
+#define IEC958_AES3_CON_FS		((1<<7) | (15<<0)) /* mask - sample frequency */
 #define IEC958_AES3_CON_FS_44100	(0<<0)	/* 44.1kHz */
 #define IEC958_AES3_CON_FS_NOTID	(1<<0)	/* non indicated */
 #define IEC958_AES3_CON_FS_48000	(2<<0)	/* 48kHz */
 #define IEC958_AES3_CON_FS_32000	(3<<0)	/* 32kHz */
 #define IEC958_AES3_CON_FS_22050	(4<<0)	/* 22.05kHz */
+#define IEC958_AES3_CON_FS_384000	(5<<0)	/* 384kHz */
 #define IEC958_AES3_CON_FS_24000	(6<<0)	/* 24kHz */
 #define IEC958_AES3_CON_FS_88200	(8<<0)	/* 88.2kHz */
 #define IEC958_AES3_CON_FS_768000	(9<<0)	/* 768kHz */
 #define IEC958_AES3_CON_FS_96000	(10<<0)	/* 96kHz */
 #define IEC958_AES3_CON_FS_176400	(12<<0)	/* 176.4kHz */
+#define IEC958_AES3_CON_FS_352400	(13<<0) /* 352.4kHz */
 #define IEC958_AES3_CON_FS_192000	(14<<0)	/* 192kHz */
+#define IEC958_AES3_CON_FS_128000	((1<<7) | (11<<0)) /* 128kHz */
+#define IEC958_AES3_CON_FS_705600	((1<<7) | (13<<0)) /* 705.6kHz */
 #define IEC958_AES3_CON_CLOCK		(3<<4)	/* mask - clock accuracy */
 #define IEC958_AES3_CON_CLOCK_1000PPM	(0<<4)	/* 1000 ppm */
 #define IEC958_AES3_CON_CLOCK_50PPM	(1<<4)	/* 50 ppm */
-- 
cgit v1.2.3


From 446216bd8e5d4a3ffc9738f6adffc98fbfdcc582 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 8 Sep 2024 13:05:48 +0900
Subject: firewire: core: expose kernel API to schedule work item to process
 isochronous context

In packet-per-buffer mode for isochronous context of 1394 OHCI, software
can schedule hardIRQ to the buffer in which the content of isochronous
packet is processed. The actual behaviour is different between isochronous
receive (IR) and transmit (IT) contexts in respect to isochronous cycle in
which the hardIRQ occurs.

In IR context, the hardIRQ occurs when the buffer is filled actually by
the content of received packet. If there are any isochronous cycles in
which the packet transmission is skipped, it is postponed to generate
the hardIRQ in respect to the isochronous cycle. In IT context, software
can schedule the content of packet every isochronous cycle including
skipping, therefore the hardIRQ occurs in the isochronous cycle to which
the software scheduled.

ALSA firewire stack uses the packet-per-buffer mode for both IR/IT
contexts. To process time stamp per packet (or per sample in some cases)
steadily for media clock recovery against unexpected transmission skips,
it uses an IT context to operate all of isochronous contexts by calls of
fw_iso_context_flush_completions() in the bottom-half of hardIRQ for the
IT context.

Although it looks well to handle all of isochronous contexts in a single
bottom-half context, it relatively takes longer time. In the future code
integration (not yet), it is possible to apply parallelism method to
process these context. In the case, it is useful to allow unit drivers to
schedule work items to process these isochronous contexts.

As a preparation, this commit exposes
fw_iso_context_schedule_flush_completions() as a kernel API available by
unit drivers. It is renamed from fw_iso_context_queue_work() since it is
a counter part of fw_iso_context_flush_completions().

Link: https://lore.kernel.org/r/20240908040549.75304-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 72f497b61739..f815d12deda0 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -531,6 +531,23 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 unsigned long payload);
 void fw_iso_context_queue_flush(struct fw_iso_context *ctx);
 int fw_iso_context_flush_completions(struct fw_iso_context *ctx);
+
+/**
+ * fw_iso_context_schedule_flush_completions() - schedule work item to process isochronous context.
+ * @ctx: the isochronous context
+ *
+ * Schedule a work item on workqueue to process the isochronous context. The registered callback
+ * function is called in the worker if some packets have been already transferred since the last
+ * time. If it is required to process the context in the current context,
+ * fw_iso_context_flush_completions() is available instead.
+ *
+ * Context: Any context.
+ */
+static inline void fw_iso_context_schedule_flush_completions(struct fw_iso_context *ctx)
+{
+	queue_work(ctx->card->isoc_wq, &ctx->work);
+}
+
 int fw_iso_context_start(struct fw_iso_context *ctx,
 			 int cycle, int sync, int tags);
 int fw_iso_context_stop(struct fw_iso_context *ctx);
-- 
cgit v1.2.3


From 1d07085402d122f223bda3f8b72bea37a46ee0c9 Mon Sep 17 00:00:00 2001
From: Bibo Mao <maobibo@loongson.cn>
Date: Sat, 7 Sep 2024 16:27:20 +0800
Subject: smp: Mark smp_prepare_boot_cpu() __init

smp_prepare_boot_cpu() is only called during boot, hence mark it as
__init.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Link: https://lore.kernel.org/all/20240907082720.452148-1-maobibo@loongson.cn
---
 include/linux/smp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index fcd61dfe2af3..6a0813c905d0 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -109,7 +109,7 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
  * Architecture specific boot CPU setup.  Defined as empty weak function in
  * init/main.c. Architectures can override it.
  */
-void smp_prepare_boot_cpu(void);
+void __init smp_prepare_boot_cpu(void);
 
 #ifdef CONFIG_SMP
 
-- 
cgit v1.2.3


From c259acab839e57eab0318f32da4ae803a8d59397 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 4 Sep 2024 07:13:05 -0700
Subject: ptp/ioctl: support MONOTONIC{,_RAW} timestamps for
 PTP_SYS_OFFSET_EXTENDED

The ability to read the PHC (Physical Hardware Clock) alongside
multiple system clocks is currently dependent on the specific
hardware architecture. This limitation restricts the use of
PTP_SYS_OFFSET_PRECISE to certain hardware configurations.

The generic soultion which would work across all architectures
is to read the PHC along with the latency to perform PHC-read as
offered by PTP_SYS_OFFSET_EXTENDED which provides pre and post
timestamps.  However, these timestamps are currently limited
to the CLOCK_REALTIME timebase. Since CLOCK_REALTIME is affected
by NTP (or similar time synchronization services), it can
experience significant jumps forward or backward. This hinders
the precise latency measurements that PTP_SYS_OFFSET_EXTENDED
is designed to provide.

This problem could be addressed by supporting MONOTONIC_RAW
timestamps within PTP_SYS_OFFSET_EXTENDED. Unlike CLOCK_REALTIME
or CLOCK_MONOTONIC, the MONOTONIC_RAW timebase is unaffected
by NTP adjustments.

This enhancement can be implemented by utilizing one of the three
reserved words within the PTP_SYS_OFFSET_EXTENDED struct to pass
the clock-id for timestamps.  The current behavior aligns with
clock-id for CLOCK_REALTIME timebase (value of 0), ensuring
backward compatibility of the UAPI.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 36 ++++++++++++++++++++++++++++++++----
 include/uapi/linux/ptp_clock.h   | 24 ++++++++++++++++++------
 2 files changed, 50 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 6e4b8206c7d0..c892d22ce0a7 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -47,10 +47,12 @@ struct system_device_crosststamp;
  * struct ptp_system_timestamp - system time corresponding to a PHC timestamp
  * @pre_ts: system timestamp before capturing PHC
  * @post_ts: system timestamp after capturing PHC
+ * @clockid: clock-base used for capturing the system timestamps
  */
 struct ptp_system_timestamp {
 	struct timespec64 pre_ts;
 	struct timespec64 post_ts;
+	clockid_t clockid;
 };
 
 /**
@@ -457,14 +459,40 @@ static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp,
 
 static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts)
 {
-	if (sts)
-		ktime_get_real_ts64(&sts->pre_ts);
+	if (sts) {
+		switch (sts->clockid) {
+		case CLOCK_REALTIME:
+			ktime_get_real_ts64(&sts->pre_ts);
+			break;
+		case CLOCK_MONOTONIC:
+			ktime_get_ts64(&sts->pre_ts);
+			break;
+		case CLOCK_MONOTONIC_RAW:
+			ktime_get_raw_ts64(&sts->pre_ts);
+			break;
+		default:
+			break;
+		}
+	}
 }
 
 static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts)
 {
-	if (sts)
-		ktime_get_real_ts64(&sts->post_ts);
+	if (sts) {
+		switch (sts->clockid) {
+		case CLOCK_REALTIME:
+			ktime_get_real_ts64(&sts->post_ts);
+			break;
+		case CLOCK_MONOTONIC:
+			ktime_get_ts64(&sts->post_ts);
+			break;
+		case CLOCK_MONOTONIC_RAW:
+			ktime_get_raw_ts64(&sts->post_ts);
+			break;
+		default:
+			break;
+		}
+	}
 }
 
 #endif
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 053b40d642de..18eefa6d93d6 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -155,13 +155,25 @@ struct ptp_sys_offset {
 	struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
 };
 
+/*
+ * ptp_sys_offset_extended - data structure for IOCTL operation
+ *			     PTP_SYS_OFFSET_EXTENDED
+ *
+ * @n_samples:	Desired number of measurements.
+ * @clockid:	clockid of a clock-base used for pre/post timestamps.
+ * @rsv:	Reserved for future use.
+ * @ts:		Array of samples in the form [pre-TS, PHC, post-TS]. The
+ *		kernel provides @n_samples.
+ *
+ * Starting from kernel 6.12 and onwards, the first word of the reserved-field
+ * is used for @clockid. That's backward compatible since previous kernel
+ * expect all three reserved words (@rsv[3]) to be 0 while the clockid (first
+ * word in the new structure) for CLOCK_REALTIME is '0'.
+ */
 struct ptp_sys_offset_extended {
-	unsigned int n_samples; /* Desired number of measurements. */
-	unsigned int rsv[3];    /* Reserved for future use. */
-	/*
-	 * Array of [system, phc, system] time stamps. The kernel will provide
-	 * 3*n_samples time stamps.
-	 */
+	unsigned int n_samples;
+	__kernel_clockid_t clockid;
+	unsigned int rsv[2];
 	struct ptp_clock_time ts[PTP_MAX_SAMPLES][3];
 };
 
-- 
cgit v1.2.3


From 1fcb932c8b5ce86219d7dedcd63659351a43291c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 4 Jul 2024 00:56:40 +0200
Subject: rcu/nocb: Simplify (de-)offloading state machine

Now that the (de-)offloading process can only apply to offline CPUs,
there is no more concurrency between rcu_core and nocb kthreads. Also
the mutation now happens on empty queues.

Therefore the state machine can be reduced to a single bit called
SEGCBLIST_OFFLOADED. Simplify the transition as follows:

* Upon offloading: queue the rdp to be added to the rcuog list and
  wait for the rcuog kthread to set the SEGCBLIST_OFFLOADED bit. Unpark
  rcuo kthread.

* Upon de-offloading: Park rcuo kthread. Queue the rdp to be removed
  from the rcuog list and wait for the rcuog kthread to clear the
  SEGCBLIST_OFFLOADED bit.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 include/linux/rcu_segcblist.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 1ef1bb54853d..2fdc2208f1ca 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -185,9 +185,7 @@ struct rcu_cblist {
  *  ----------------------------------------------------------------------------
  */
 #define SEGCBLIST_ENABLED	BIT(0)
-#define SEGCBLIST_LOCKING	BIT(1)
-#define SEGCBLIST_KTHREAD_GP	BIT(2)
-#define SEGCBLIST_OFFLOADED	BIT(3)
+#define SEGCBLIST_OFFLOADED	BIT(1)
 
 struct rcu_segcblist {
 	struct rcu_head *head;
-- 
cgit v1.2.3


From bd7c8ff9fef4b21a97f9b30a7364845ee6eaaf23 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Wed, 4 Sep 2024 15:04:53 +0200
Subject: treewide: Fix wrong singular form of jiffies in comments

There are several comments all over the place, which uses a wrong singular
form of jiffies.

Replace 'jiffie' by 'jiffy'. No functional change.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # m68k
Link: https://lore.kernel.org/all/20240904-devel-anna-maria-b4-timers-flseep-v1-3-e98760256370@linutronix.de
---
 include/linux/jiffies.h             | 2 +-
 include/linux/timekeeper_internal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index d9f1435a5a13..1220f0fbe5bf 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -418,7 +418,7 @@ extern unsigned long preset_lpj;
 #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\
                                         TICK_NSEC -1) / (u64)TICK_NSEC))
 /*
- * The maximum jiffie value is (MAX_INT >> 1).  Here we translate that
+ * The maximum jiffy value is (MAX_INT >> 1).  Here we translate that
  * into seconds.  The 64-bit case will overflow if we are not careful,
  * so use the messy SH_DIV macro to do it.  Still all constants.
  */
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 84ff2844df2a..902c20ef495a 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -73,7 +73,7 @@ struct tk_read_base {
  * @overflow_seen:	Overflow warning flag (DEBUG_TIMEKEEPING)
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
- * we need to add to xtime (or xtime corrected for sub jiffie times)
+ * we need to add to xtime (or xtime corrected for sub jiffy times)
  * to get to monotonic time.  Monotonic is pegged at zero at system
  * boot time, so wall_to_monotonic will be negative, however, we will
  * ALWAYS keep the tv_nsec part positive so we can use the usual
-- 
cgit v1.2.3


From 4e1b5586051f0e9aef9b0ca375ef2cd1a8987e0c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 9 Sep 2024 11:03:08 +0200
Subject: wifi: cfg80211: fix kernel-doc for per-link data

There cannot be brackets in kernel-doc, remove them.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 62c16f219a73 ("wifi: cfg80211: move DFS related members to links[] in wireless_dev")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index efdea667cb92..69ec1eb41a09 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6217,11 +6217,11 @@ enum ieee80211_ap_reg_power {
  *	unprotected beacon report
  * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr
  *	@ap and @client for each link
- * @links[].cac_started: true if DFS channel availability check has been
+ * @links.cac_started: true if DFS channel availability check has been
  *	started
- * @links[].cac_start_time: timestamp (jiffies) when the dfs state was
+ * @links.cac_start_time: timestamp (jiffies) when the dfs state was
  *	entered.
- * @links[].cac_time_ms: CAC time in ms
+ * @links.cac_time_ms: CAC time in ms
  * @valid_links: bitmap describing what elements of @links are valid
  */
 struct wireless_dev {
-- 
cgit v1.2.3


From ca229bdbef29be207c8666f106acc3bf50736c05 Mon Sep 17 00:00:00 2001
From: Cheng Ming Lin <chengminglin@mxic.com.tw>
Date: Mon, 9 Sep 2024 17:26:42 +0800
Subject: mtd: spinand: Add support for setting plane select bits

Add two flags for inserting the Plane Select bit into the column
address during the write_to_cache and the read_from_cache operation.

Add the SPINAND_HAS_PROG_PLANE_SELECT_BIT flag for serial NAND flash
that require inserting the Plane Select bit into the column address
during the write_to_cache operation.

Add the SPINAND_HAS_READ_PLANE_SELECT_BIT flag for serial NAND flash
that require inserting the Plane Select bit into the column address
during the read_from_cache operation.

Signed-off-by: Cheng Ming Lin <chengminglin@mxic.com.tw>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240909092643.2434479-2-linchengming884@gmail.com
---
 include/linux/mtd/spinand.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 8dbf67ca710a..702e5fb13dae 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -312,6 +312,8 @@ struct spinand_ecc_info {
 
 #define SPINAND_HAS_QE_BIT		BIT(0)
 #define SPINAND_HAS_CR_FEAT_BIT		BIT(1)
+#define SPINAND_HAS_PROG_PLANE_SELECT_BIT		BIT(2)
+#define SPINAND_HAS_READ_PLANE_SELECT_BIT		BIT(3)
 
 /**
  * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure
-- 
cgit v1.2.3


From d688d65a847ff910146eff51e70f4b7049895eb5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 30 Aug 2024 15:04:49 +0200
Subject: fs: add generic_llseek_cookie()

This is similar to generic_file_llseek() but allows the caller to
specify a cookie that will be updated to indicate that a seek happened.
Caller's requiring that information in their readdir implementations can
use that.

Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-8-6d3e4816aa7b@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 58c91a52cad1..3e6b3c1afb31 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3202,6 +3202,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
 		int whence, loff_t maxsize, loff_t eof);
+loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
+			     u64 *cookie);
 extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
 		int whence, loff_t size);
 extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
-- 
cgit v1.2.3


From 88d2ae0e6eb84e1ed58f339c6a0de16c24fa2a60 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 8 Aug 2024 11:18:21 -0400
Subject: inode: make __iget() a static inline

bcachefs is switching to an rhashtable for vfs inodes instead of the
standard inode.c hashtable, so we need this exported, or - a static
inline makes more sense for a single atomic_inc().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/fs.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..8fc4bad3b6ae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3094,7 +3094,14 @@ static inline bool is_zero_ino(ino_t ino)
 	return (u32)ino == 0;
 }
 
-extern void __iget(struct inode * inode);
+/*
+ * inode->i_lock must be held
+ */
+static inline void __iget(struct inode *inode)
+{
+	atomic_inc(&inode->i_count);
+}
+
 extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
-- 
cgit v1.2.3


From f6594633817374a64a5fb31007bd810cad1425ff Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 17 Jun 2024 19:00:33 -0400
Subject: lib/generic-radix-tree.c: genradix_ptr_inlined()

Provide an inlined fast path

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/generic-radix-tree.h | 75 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

(limited to 'include')

diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index f3512fddf3d7..8a3e1e886d1c 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -48,6 +48,49 @@ struct genradix_root;
 #define GENRADIX_NODE_SHIFT	9
 #define GENRADIX_NODE_SIZE	(1U << GENRADIX_NODE_SHIFT)
 
+#define GENRADIX_ARY		(GENRADIX_NODE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT	ilog2(GENRADIX_ARY)
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH	\
+	DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK				\
+	((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+	return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+	return 1UL << genradix_depth_shift(depth);
+}
+
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+	return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+	return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+struct genradix_node {
+	union {
+		/* Interior node: */
+		struct genradix_node	*children[GENRADIX_ARY];
+
+		/* Leaf: */
+		u8			data[GENRADIX_NODE_SIZE];
+	};
+};
+
 struct __genradix {
 	struct genradix_root		*root;
 };
@@ -128,6 +171,30 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
 #define __genradix_idx_to_offset(_radix, _idx)			\
 	__idx_to_offset(_idx, __genradix_obj_size(_radix))
 
+static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset)
+{
+	struct genradix_root *r = READ_ONCE(radix->root);
+	struct genradix_node *n = genradix_root_to_node(r);
+	unsigned level		= genradix_root_to_depth(r);
+	unsigned shift		= genradix_depth_shift(level);
+
+	if (unlikely(ilog2(offset) >= genradix_depth_shift(level)))
+		return NULL;
+
+	while (n && shift > GENRADIX_NODE_SHIFT) {
+		shift -= GENRADIX_ARY_SHIFT;
+		n = n->children[offset >> shift];
+		offset &= (1UL << shift) - 1;
+	}
+
+	return n ? &n->data[offset] : NULL;
+}
+
+#define genradix_ptr_inlined(_radix, _idx)			\
+	(__genradix_cast(_radix)				\
+	 __genradix_ptr_inlined(&(_radix)->tree,		\
+			__genradix_idx_to_offset(_radix, _idx)))
+
 void *__genradix_ptr(struct __genradix *, size_t);
 
 /**
@@ -144,6 +211,14 @@ void *__genradix_ptr(struct __genradix *, size_t);
 
 void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
 
+#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp)			\
+	(__genradix_cast(_radix)					\
+	 (__genradix_ptr_inlined(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _idx)) ?:	\
+	  __genradix_ptr_alloc(&(_radix)->tree,				\
+			__genradix_idx_to_offset(_radix, _idx),		\
+			_gfp)))
+
 /**
  * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
  *			if necessary
-- 
cgit v1.2.3


From b3f9da79e7783ca4f1c1d4214af976c548629c1d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 10 Aug 2024 23:14:44 -0400
Subject: lib/generic-radix-tree.c: add preallocation

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/generic-radix-tree.h | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index 8a3e1e886d1c..5b51c3d582d6 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -41,6 +41,7 @@
 #include <linux/limits.h>
 #include <linux/log2.h>
 #include <linux/math.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 
 struct genradix_root;
@@ -81,6 +82,10 @@ static inline struct genradix_node *genradix_root_to_node(struct genradix_root *
 	return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
 }
 
+struct __genradix {
+	struct genradix_root		*root;
+};
+
 struct genradix_node {
 	union {
 		/* Interior node: */
@@ -91,9 +96,15 @@ struct genradix_node {
 	};
 };
 
-struct __genradix {
-	struct genradix_root		*root;
-};
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+	return kzalloc(GENRADIX_NODE_SIZE, gfp_mask);
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+	kfree(node);
+}
 
 /*
  * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE:
@@ -209,7 +220,8 @@ void *__genradix_ptr(struct __genradix *, size_t);
 	 __genradix_ptr(&(_radix)->tree,			\
 			__genradix_idx_to_offset(_radix, _idx)))
 
-void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+void *__genradix_ptr_alloc(struct __genradix *, size_t,
+			   struct genradix_node **, gfp_t);
 
 #define genradix_ptr_alloc_inlined(_radix, _idx, _gfp)			\
 	(__genradix_cast(_radix)					\
@@ -217,7 +229,15 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
 			__genradix_idx_to_offset(_radix, _idx)) ?:	\
 	  __genradix_ptr_alloc(&(_radix)->tree,				\
 			__genradix_idx_to_offset(_radix, _idx),		\
-			_gfp)))
+			NULL, _gfp)))
+
+#define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\
+	(__genradix_cast(_radix)					\
+	 (__genradix_ptr_inlined(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _idx)) ?:	\
+	  __genradix_ptr_alloc(&(_radix)->tree,				\
+			__genradix_idx_to_offset(_radix, _idx),		\
+			_new_node, _gfp)))
 
 /**
  * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
@@ -232,7 +252,13 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
 	(__genradix_cast(_radix)				\
 	 __genradix_ptr_alloc(&(_radix)->tree,			\
 			__genradix_idx_to_offset(_radix, _idx),	\
-			_gfp))
+			NULL, _gfp))
+
+#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\
+	(__genradix_cast(_radix)				\
+	 __genradix_ptr_alloc(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _idx),	\
+			_new_node, _gfp))
 
 struct genradix_iter {
 	size_t			offset;
-- 
cgit v1.2.3


From f877f1d81b2ffcac341ff62ae076d7ad5ba83f46 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 9 Sep 2024 23:00:18 +0900
Subject: firewire: core: use mutex to coordinate concurrent calls to flush
 completions

In current implementation, test_and_set_bit_lock() is used to mediate
concurrent calls of ohci_flush_iso_completions(). However, the ad-hoc
usage of atomic operations is not preferable.

This commit uses mutex_trylock() as the similar operations. The core
function is responsible for the mediation, instead of 1394 OHCI driver.

Link: https://lore.kernel.org/r/20240909140018.65289-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index f815d12deda0..19e8c5f9537c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -512,6 +512,7 @@ union fw_iso_callback {
 struct fw_iso_context {
 	struct fw_card *card;
 	struct work_struct work;
+	struct mutex flushing_completions_mutex;
 	int type;
 	int channel;
 	int speed;
-- 
cgit v1.2.3


From 34c626c3004a73ee7da5072be48ddff9c2902902 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 16 Jan 2023 01:16:43 +0200
Subject: net/mlx5: Added missing mlx5_ifc definition for HW Steering

Add mlx5_ifc definitions that are required for HWS support.

Note that due to change in the mlx5_ifc_flow_table_context_bits
structure that now includes both SWS and HWS bits in a union,
this patch also includes small change in one of SWS files that
was required for compilation.

Reviewed-by: Hamdan Agbariya <hamdani@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 189 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 161 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 234ad6f16e92..b6f8e3834bd3 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -80,23 +80,15 @@ enum {
 
 enum {
 	MLX5_OBJ_TYPE_SW_ICM = 0x0008,
-	MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT  = 0x23,
-};
-
-enum {
-	MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
-	MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
-	MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13),
-	MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT =
-		(1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT),
-	MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39),
-};
-
-enum {
 	MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
 	MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d,
 	MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c,
 	MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018,
+	MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT  = 0x23,
+	MLX5_OBJ_TYPE_STC = 0x0040,
+	MLX5_OBJ_TYPE_RTC = 0x0041,
+	MLX5_OBJ_TYPE_STE = 0x0042,
+	MLX5_OBJ_TYPE_MODIFY_HDR_PATTERN = 0x0043,
 	MLX5_OBJ_TYPE_PAGE_TRACK = 0x46,
 	MLX5_OBJ_TYPE_MKEY = 0xff01,
 	MLX5_OBJ_TYPE_QP = 0xff02,
@@ -112,6 +104,16 @@ enum {
 	MLX5_OBJ_TYPE_RQT = 0xff0e,
 	MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f,
 	MLX5_OBJ_TYPE_CQ = 0xff10,
+	MLX5_OBJ_TYPE_FT_ALIAS = 0xff15,
+};
+
+enum {
+	MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
+	MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
+	MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13),
+	MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT =
+		(1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT),
+	MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39),
 };
 
 enum {
@@ -313,6 +315,7 @@ enum {
 	MLX5_CMD_OP_MODIFY_VHCA_STATE             = 0xb0e,
 	MLX5_CMD_OP_SYNC_CRYPTO                   = 0xb12,
 	MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS       = 0xb16,
+	MLX5_CMD_OP_GENERATE_WQE                  = 0xb17,
 	MLX5_CMD_OP_MAX
 };
 
@@ -485,7 +488,13 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_66[0x2];
 	u8         reformat_add_macsec[0x1];
 	u8         reformat_remove_macsec[0x1];
-	u8         reserved_at_6a[0xe];
+	u8         reparse[0x1];
+	u8         reserved_at_6b[0x1];
+	u8         cross_vhca_object[0x1];
+	u8         reformat_l2_to_l3_audp_tunnel[0x1];
+	u8         reformat_l3_audp_tunnel_to_l2[0x1];
+	u8         ignore_flow_level_rtc_valid[0x1];
+	u8         reserved_at_70[0x8];
 	u8         log_max_ft_num[0x8];
 
 	u8         reserved_at_80[0x10];
@@ -522,7 +531,15 @@ struct mlx5_ifc_ipv6_layout_bits {
 	u8         ipv6[16][0x8];
 };
 
+struct mlx5_ifc_ipv6_simple_layout_bits {
+	u8         ipv6_127_96[0x20];
+	u8         ipv6_95_64[0x20];
+	u8         ipv6_63_32[0x20];
+	u8         ipv6_31_0[0x20];
+};
+
 union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+	struct mlx5_ifc_ipv6_simple_layout_bits ipv6_simple_layout;
 	struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
 	struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
 	u8         reserved_at_0[0x80];
@@ -911,7 +928,9 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      reserved_at_8[0x5];
 	u8      fdb_uplink_hairpin[0x1];
 	u8      fdb_multi_path_any_table_limit_regc[0x1];
-	u8      reserved_at_f[0x3];
+	u8      reserved_at_f[0x1];
+	u8      fdb_dynamic_tunnel[0x1];
+	u8      reserved_at_11[0x1];
 	u8      fdb_multi_path_any_table[0x1];
 	u8      reserved_at_13[0x2];
 	u8      fdb_modify_header_fwd_to_table[0x1];
@@ -950,6 +969,73 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      reserved_at_1900[0x6700];
 };
 
+struct mlx5_ifc_wqe_based_flow_table_cap_bits {
+	u8         reserved_at_0[0x3];
+	u8         log_max_num_ste[0x5];
+	u8         reserved_at_8[0x3];
+	u8         log_max_num_stc[0x5];
+	u8         reserved_at_10[0x3];
+	u8         log_max_num_rtc[0x5];
+	u8         reserved_at_18[0x3];
+	u8         log_max_num_header_modify_pattern[0x5];
+
+	u8         rtc_hash_split_table[0x1];
+	u8         rtc_linear_lookup_table[0x1];
+	u8         reserved_at_22[0x1];
+	u8         stc_alloc_log_granularity[0x5];
+	u8         reserved_at_28[0x3];
+	u8         stc_alloc_log_max[0x5];
+	u8         reserved_at_30[0x3];
+	u8         ste_alloc_log_granularity[0x5];
+	u8         reserved_at_38[0x3];
+	u8         ste_alloc_log_max[0x5];
+
+	u8         reserved_at_40[0xb];
+	u8         rtc_reparse_mode[0x5];
+	u8         reserved_at_50[0x3];
+	u8         rtc_index_mode[0x5];
+	u8         reserved_at_58[0x3];
+	u8         rtc_log_depth_max[0x5];
+
+	u8         reserved_at_60[0x10];
+	u8         ste_format[0x10];
+
+	u8         stc_action_type[0x80];
+
+	u8         header_insert_type[0x10];
+	u8         header_remove_type[0x10];
+
+	u8         trivial_match_definer[0x20];
+
+	u8         reserved_at_140[0x1b];
+	u8         rtc_max_num_hash_definer_gen_wqe[0x5];
+
+	u8         reserved_at_160[0x18];
+	u8         access_index_mode[0x8];
+
+	u8         reserved_at_180[0x10];
+	u8         ste_format_gen_wqe[0x10];
+
+	u8         linear_match_definer_reg_c3[0x20];
+
+	u8         fdb_jump_to_tir_stc[0x1];
+	u8         reserved_at_1c1[0x1f];
+};
+
+struct mlx5_ifc_esw_cap_bits {
+	u8         reserved_at_0[0x1d];
+	u8         merged_eswitch[0x1];
+	u8         reserved_at_1e[0x2];
+
+	u8         reserved_at_20[0x40];
+
+	u8         esw_manager_vport_number_valid[0x1];
+	u8         reserved_at_61[0xf];
+	u8         esw_manager_vport_number[0x10];
+
+	u8         reserved_at_80[0x780];
+};
+
 enum {
 	MLX5_COUNTER_SOURCE_ESWITCH = 0x0,
 	MLX5_COUNTER_FLOW_ESWITCH   = 0x1,
@@ -1443,9 +1529,13 @@ enum {
 };
 
 enum {
+	MLX5_FLEX_IPV4_OVER_VXLAN_ENABLED	= 1 << 0,
+	MLX5_FLEX_IPV6_OVER_VXLAN_ENABLED	= 1 << 1,
+	MLX5_FLEX_IPV6_OVER_IP_ENABLED		= 1 << 2,
 	MLX5_FLEX_PARSER_GENEVE_ENABLED		= 1 << 3,
 	MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED	= 1 << 4,
 	MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED	= 1 << 5,
+	MLX5_FLEX_P_BIT_VXLAN_GPE_ENABLED	= 1 << 6,
 	MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED	= 1 << 7,
 	MLX5_FLEX_PARSER_ICMP_V4_ENABLED	= 1 << 8,
 	MLX5_FLEX_PARSER_ICMP_V6_ENABLED	= 1 << 9,
@@ -1650,7 +1740,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         pci_sync_for_fw_update_event[0x1];
 	u8         reserved_at_1f2[0x6];
 	u8         init2_lag_tx_port_affinity[0x1];
-	u8         reserved_at_1fa[0x3];
+	u8         reserved_at_1fa[0x2];
+	u8         wqe_based_flow_table_update_cap[0x1];
 	u8         cqe_version[0x4];
 
 	u8         compact_address_vector[0x1];
@@ -1959,7 +2050,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_760[0x3];
 	u8         log_max_num_header_modify_argument[0x5];
-	u8         reserved_at_768[0x4];
+	u8         log_header_modify_argument_granularity_offset[0x4];
 	u8         log_header_modify_argument_granularity[0x4];
 	u8         reserved_at_770[0x3];
 	u8         log_header_modify_argument_max_alloc[0x5];
@@ -2006,7 +2097,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_140[0x60];
 
 	u8	   flow_table_type_2_type[0x8];
-	u8	   reserved_at_1a8[0x3];
+	u8	   reserved_at_1a8[0x2];
+	u8         format_select_dw_8_6_ext[0x1];
 	u8	   log_min_mkey_entity_size[0x5];
 	u8	   reserved_at_1b0[0x10];
 
@@ -2022,6 +2114,16 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_250[0x10];
 
 	u8	   reserved_at_260[0x120];
+
+	u8	   format_select_dw_gtpu_dw_0[0x8];
+	u8	   format_select_dw_gtpu_dw_1[0x8];
+	u8	   format_select_dw_gtpu_dw_2[0x8];
+	u8	   format_select_dw_gtpu_first_ext_dw_0[0x8];
+
+	u8	   generate_wqe_type[0x20];
+
+	u8	   reserved_at_2c0[0xc0];
+
 	u8	   reserved_at_380[0xb];
 	u8	   min_mkey_log_entity_size_fixed_buffer[0x5];
 	u8	   ec_vf_vport_base[0x10];
@@ -2037,9 +2139,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 
 	u8	   reserved_at_400[0x1];
 	u8	   min_mkey_log_entity_size_fixed_buffer_valid[0x1];
-	u8	   reserved_at_402[0x1e];
+	u8	   reserved_at_402[0xe];
+	u8	   return_reg_id[0x10];
 
-	u8	   reserved_at_420[0x20];
+	u8	   reserved_at_420[0x1c];
+	u8	   flow_table_hash_type[0x4];
 
 	u8	   reserved_at_440[0x8];
 	u8	   max_num_eqs_24b[0x18];
@@ -2086,7 +2190,7 @@ struct mlx5_ifc_extended_dest_format_bits {
 	u8         reserved_at_60[0x20];
 };
 
-union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
+union mlx5_ifc_dest_format_flow_counter_list_auto_bits {
 	struct mlx5_ifc_extended_dest_format_bits extended_dest_format;
 	struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
 };
@@ -2178,7 +2282,10 @@ struct mlx5_ifc_wq_bits {
 	u8         reserved_at_139[0x4];
 	u8         log_wqe_stride_size[0x3];
 
-	u8         reserved_at_140[0x80];
+	u8         dbr_umem_id[0x20];
+	u8         wq_umem_id[0x20];
+
+	u8         wq_umem_offset[0x40];
 
 	u8         headers_mkey[0x20];
 
@@ -3562,6 +3669,8 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps;
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
+	struct mlx5_ifc_wqe_based_flow_table_cap_bits wqe_based_flow_table_cap;
+	struct mlx5_ifc_esw_cap_bits esw_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
 	struct mlx5_ifc_port_selection_cap_bits port_selection_cap;
 	struct mlx5_ifc_qos_cap_bits qos_cap;
@@ -3678,7 +3787,7 @@ struct mlx5_ifc_flow_context_bits {
 
 	u8         reserved_at_1300[0x500];
 
-	union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[];
+	union mlx5_ifc_dest_format_flow_counter_list_auto_bits destination[];
 };
 
 enum {
@@ -3919,7 +4028,8 @@ struct mlx5_ifc_sqc_bits {
 	u8         reg_umr[0x1];
 	u8         allow_swp[0x1];
 	u8         hairpin[0x1];
-	u8         reserved_at_f[0xb];
+	u8         non_wire[0x1];
+	u8         reserved_at_10[0xa];
 	u8	   ts_format[0x2];
 	u8	   reserved_at_1c[0x4];
 
@@ -4961,6 +5071,16 @@ struct mlx5_ifc_set_fte_in_bits {
 	struct mlx5_ifc_flow_context_bits flow_context;
 };
 
+struct mlx5_ifc_dest_format_bits {
+	u8         destination_type[0x8];
+	u8         destination_id[0x18];
+
+	u8         destination_eswitch_owner_vhca_id_valid[0x1];
+	u8         packet_reformat[0x1];
+	u8         reserved_at_22[0xe];
+	u8         destination_eswitch_owner_vhca_id[0x10];
+};
+
 struct mlx5_ifc_rts2rts_qp_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6127,7 +6247,8 @@ struct mlx5_ifc_flow_table_context_bits {
 	u8         termination_table[0x1];
 	u8         table_miss_action[0x4];
 	u8         level[0x8];
-	u8         reserved_at_10[0x8];
+	u8         rtc_valid[0x1];
+	u8         reserved_at_11[0x7];
 	u8         log_size[0x8];
 
 	u8         reserved_at_20[0x8];
@@ -6137,11 +6258,21 @@ struct mlx5_ifc_flow_table_context_bits {
 	u8         lag_master_next_table_id[0x18];
 
 	u8         reserved_at_60[0x60];
+	union {
+		struct {
+			u8         sw_owner_icm_root_1[0x40];
+
+			u8         sw_owner_icm_root_0[0x40];
+		} sws;
+		struct {
+			u8         rtc_id_0[0x20];
 
-	u8         sw_owner_icm_root_1[0x40];
+			u8         rtc_id_1[0x20];
 
-	u8         sw_owner_icm_root_0[0x40];
+			u8         reserved_at_100[0x40];
 
+		} hws;
+	};
 };
 
 struct mlx5_ifc_query_flow_table_out_bits {
@@ -8923,7 +9054,9 @@ struct mlx5_ifc_create_qp_in_bits {
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_at_800[0x60];
+	u8         wq_umem_offset[0x40];
+
+	u8         wq_umem_id[0x20];
 
 	u8         wq_umem_valid[0x1];
 	u8         reserved_at_861[0x1f];
-- 
cgit v1.2.3


From 00b9f0daefd7670356e592d418c890acf9179c94 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 17 Jan 2023 17:14:51 +0200
Subject: net/mlx5: Added missing definitions in preparation for HW Steering

As part of preparation for HWS, added missing definitions
in qp.h and fs_core.h:

 - FS_FT_FDB_RX/TX table types that are used by HWS in addition
   to an existing FS_FT_FDB
 - MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE that is used by HWS to
   require fence in WQE

Reviewed-by: Hamdan Agbariya <hamdani@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/qp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index ad1ce650146c..fc7eeff99a8a 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -149,6 +149,7 @@ enum {
 	MLX5_WQE_CTRL_CQ_UPDATE		= 2 << 2,
 	MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE	= 3 << 2,
 	MLX5_WQE_CTRL_SOLICITED		= 1 << 1,
+	MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5,
 };
 
 enum {
-- 
cgit v1.2.3


From 452ef7f86036392005940de54228d42ca0044192 Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Mon, 5 Aug 2024 10:03:20 +0300
Subject: net/mlx5: Add missing masks and QoS bit masks for scheduling elements

Add the missing masks for supported element types and Transmit
Scheduling Arbiter (TSAR) types in scheduling elements.

Also, add the corresponding bit masks for these types in the QoS
capabilities of a NIC scheduler.

Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cab228cf51c6..cfdf984a95a8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1027,7 +1027,8 @@ struct mlx5_ifc_qos_cap_bits {
 
 	u8         max_tsar_bw_share[0x20];
 
-	u8         reserved_at_100[0x20];
+	u8         nic_element_type[0x10];
+	u8         nic_tsar_type[0x10];
 
 	u8         reserved_at_120[0x3];
 	u8         log_meter_aso_granularity[0x5];
@@ -3966,6 +3967,7 @@ enum {
 	ELEMENT_TYPE_CAP_MASK_VPORT		= 1 << 1,
 	ELEMENT_TYPE_CAP_MASK_VPORT_TC		= 1 << 2,
 	ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC	= 1 << 3,
+	ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP	= 1 << 4,
 };
 
 struct mlx5_ifc_scheduling_context_bits {
@@ -4675,6 +4677,12 @@ enum {
 	TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
 };
 
+enum {
+	TSAR_TYPE_CAP_MASK_DWRR		= 1 << 0,
+	TSAR_TYPE_CAP_MASK_ROUND_ROBIN	= 1 << 1,
+	TSAR_TYPE_CAP_MASK_ETS		= 1 << 2,
+};
+
 struct mlx5_ifc_tsar_element_bits {
 	u8         reserved_at_0[0x8];
 	u8         tsar_type[0x8];
-- 
cgit v1.2.3


From 080d72f471c86f8906845bc822051f5790d0a90d Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Wed, 4 Sep 2024 17:47:43 +0200
Subject: libeth: add Tx buffer completion helpers

Software-side Tx buffers for storing DMA, frame size, skb pointers etc.
are pretty much generic and every driver defines them the same way. The
same can be said for software Tx completions -- same napi_consume_skb()s
and all that...
Add a couple simple wrappers for doing that to stop repeating the old
tale at least within the Intel code. Drivers are free to use 'priv'
member at the end of the structure.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/net/libeth/tx.h    | 129 +++++++++++++++++++++++++++++++++++++++++++++
 include/net/libeth/types.h |  25 +++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 include/net/libeth/tx.h
 create mode 100644 include/net/libeth/types.h

(limited to 'include')

diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h
new file mode 100644
index 000000000000..35614f9523f6
--- /dev/null
+++ b/include/net/libeth/tx.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_TX_H
+#define __LIBETH_TX_H
+
+#include <linux/skbuff.h>
+
+#include <net/libeth/types.h>
+
+/* Tx buffer completion */
+
+/**
+ * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion
+ * @LIBETH_SQE_EMPTY: unused/empty, no action required
+ * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required
+ * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree()
+ * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA
+ * @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats
+ */
+enum libeth_sqe_type {
+	LIBETH_SQE_EMPTY		= 0U,
+	LIBETH_SQE_CTX,
+	LIBETH_SQE_SLAB,
+	LIBETH_SQE_FRAG,
+	LIBETH_SQE_SKB,
+};
+
+/**
+ * struct libeth_sqe - represents a Send Queue Element / Tx buffer
+ * @type: type of the buffer, see the enum above
+ * @rs_idx: index of the last buffer from the batch this one was sent in
+ * @raw: slab buffer to free via kfree()
+ * @skb: &sk_buff to consume
+ * @dma: DMA address to unmap
+ * @len: length of the mapped region to unmap
+ * @nr_frags: number of frags in the frame this buffer belongs to
+ * @packets: number of physical packets sent for this frame
+ * @bytes: number of physical bytes sent for this frame
+ * @priv: driver-private scratchpad
+ */
+struct libeth_sqe {
+	enum libeth_sqe_type		type:32;
+	u32				rs_idx;
+
+	union {
+		void				*raw;
+		struct sk_buff			*skb;
+	};
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+
+	u32				nr_frags;
+	u32				packets;
+	u32				bytes;
+
+	unsigned long			priv;
+} __aligned_largest;
+
+/**
+ * LIBETH_SQE_CHECK_PRIV - check the driver's private SQE data
+ * @p: type or name of the object the driver wants to fit into &libeth_sqe
+ *
+ * Make sure the driver's private data fits into libeth_sqe::priv. To be used
+ * right after its declaration.
+ */
+#define LIBETH_SQE_CHECK_PRIV(p)					  \
+	static_assert(sizeof(p) <= sizeof_field(struct libeth_sqe, priv))
+
+/**
+ * struct libeth_cq_pp - completion queue poll params
+ * @dev: &device to perform DMA unmapping
+ * @ss: onstack NAPI stats to fill
+ * @napi: whether it's called from the NAPI context
+ *
+ * libeth uses this structure to access objects needed for performing full
+ * Tx complete operation without passing lots of arguments and change the
+ * prototypes each time a new one is added.
+ */
+struct libeth_cq_pp {
+	struct device			*dev;
+	struct libeth_sq_napi_stats	*ss;
+
+	bool				napi;
+};
+
+/**
+ * libeth_tx_complete - perform Tx completion for one SQE
+ * @sqe: SQE to complete
+ * @cp: poll params
+ *
+ * Do Tx complete for all the types of buffers, incl. freeing, unmapping,
+ * updating the stats etc.
+ */
+static inline void libeth_tx_complete(struct libeth_sqe *sqe,
+				      const struct libeth_cq_pp *cp)
+{
+	switch (sqe->type) {
+	case LIBETH_SQE_EMPTY:
+		return;
+	case LIBETH_SQE_SKB:
+	case LIBETH_SQE_FRAG:
+	case LIBETH_SQE_SLAB:
+		dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma),
+			       dma_unmap_len(sqe, len), DMA_TO_DEVICE);
+		break;
+	default:
+		break;
+	}
+
+	switch (sqe->type) {
+	case LIBETH_SQE_SKB:
+		cp->ss->packets += sqe->packets;
+		cp->ss->bytes += sqe->bytes;
+
+		napi_consume_skb(sqe->skb, cp->napi);
+		break;
+	case LIBETH_SQE_SLAB:
+		kfree(sqe->raw);
+		break;
+	default:
+		break;
+	}
+
+	sqe->type = LIBETH_SQE_EMPTY;
+}
+
+#endif /* __LIBETH_TX_H */
diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h
new file mode 100644
index 000000000000..603825e45133
--- /dev/null
+++ b/include/net/libeth/types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_TYPES_H
+#define __LIBETH_TYPES_H
+
+#include <linux/types.h>
+
+/**
+ * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop
+ * @packets: completed frames counter
+ * @bytes: sum of bytes of completed frames above
+ * @raw: alias to access all the fields as an array
+ */
+struct libeth_sq_napi_stats {
+	union {
+		struct {
+							u32 packets;
+							u32 bytes;
+		};
+		DECLARE_FLEX_ARRAY(u32, raw);
+	};
+};
+
+#endif /* __LIBETH_TYPES_H */
-- 
cgit v1.2.3


From 3dc95a3edd0a86b4a59670b3fafcc64c7d83e2e7 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Wed, 4 Sep 2024 17:47:45 +0200
Subject: netdevice: add netdev_tx_reset_subqueue() shorthand

Add a shorthand similar to other net*_subqueue() helpers for resetting
the queue by its index w/o obtaining &netdev_tx_queue beforehand
manually.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/netdevice.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b47c00657bd0..44d1dbb54ffe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3568,6 +3568,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 #endif
 }
 
+/**
+ * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue
+ * @dev: network device
+ * @qid: stack index of the queue to reset
+ */
+static inline void netdev_tx_reset_subqueue(const struct net_device *dev,
+					    u32 qid)
+{
+	netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid));
+}
+
 /**
  * 	netdev_reset_queue - reset the packets and bytes count of a network device
  * 	@dev_queue: network device
@@ -3577,7 +3588,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q)
  */
 static inline void netdev_reset_queue(struct net_device *dev_queue)
 {
-	netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
+	netdev_tx_reset_subqueue(dev_queue, 0);
 }
 
 /**
-- 
cgit v1.2.3


From 40a895fd9a358eea16901026360bd2cd3ca691a7 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 5 Sep 2024 15:35:45 -0700
Subject: cxl: move cxl headers to new include/cxl/ directory

Group all cxl related kernel headers into include/cxl/ directory.

Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/20240905223711.1990186-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/cxl/einj.h        |  44 ++++++++++++
 include/cxl/event.h       | 175 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cxl-event.h | 175 ----------------------------------------------
 include/linux/einj-cxl.h  |  44 ------------
 4 files changed, 219 insertions(+), 219 deletions(-)
 create mode 100644 include/cxl/einj.h
 create mode 100644 include/cxl/event.h
 delete mode 100644 include/linux/cxl-event.h
 delete mode 100644 include/linux/einj-cxl.h

(limited to 'include')

diff --git a/include/cxl/einj.h b/include/cxl/einj.h
new file mode 100644
index 000000000000..624ff6ff41f9
--- /dev/null
+++ b/include/cxl/einj.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CXL protocol Error INJection support.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#ifndef EINJ_CXL_H
+#define EINJ_CXL_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct pci_dev;
+struct seq_file;
+
+#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
+int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
+bool einj_cxl_is_initialized(void);
+#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
+static inline int einj_cxl_available_error_type_show(struct seq_file *m,
+						     void *v)
+{
+	return -ENXIO;
+}
+
+static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+	return -ENXIO;
+}
+
+static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+	return -ENXIO;
+}
+
+static inline bool einj_cxl_is_initialized(void) { return false; }
+#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
+
+#endif /* EINJ_CXL_H */
diff --git a/include/cxl/event.h b/include/cxl/event.h
new file mode 100644
index 000000000000..0bea1afbd747
--- /dev/null
+++ b/include/cxl/event.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Intel Corporation. */
+#ifndef _LINUX_CXL_EVENT_H
+#define _LINUX_CXL_EVENT_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/workqueue_types.h>
+
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_hdr {
+	u8 length;
+	u8 flags[3];
+	__le16 handle;
+	__le16 related_handle;
+	__le64 timestamp;
+	u8 maint_op_class;
+	u8 reserved[15];
+} __packed;
+
+struct cxl_event_media_hdr {
+	struct cxl_event_record_hdr hdr;
+	__le64 phys_addr;
+	u8 descriptor;
+	u8 type;
+	u8 transaction_type;
+	/*
+	 * The meaning of Validity Flags from bit 2 is
+	 * different across DRAM and General Media records
+	 */
+	u8 validity_flags[2];
+	u8 channel;
+	u8 rank;
+} __packed;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+struct cxl_event_generic {
+	struct cxl_event_record_hdr hdr;
+	u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
+} __packed;
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE	0x10
+struct cxl_event_gen_media {
+	struct cxl_event_media_hdr media_hdr;
+	u8 device[3];
+	u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+	u8 reserved[46];
+} __packed;
+
+/*
+ * DRAM Event Record - DER
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
+ */
+#define CXL_EVENT_DER_CORRECTION_MASK_SIZE	0x20
+struct cxl_event_dram {
+	struct cxl_event_media_hdr media_hdr;
+	u8 nibble_mask[3];
+	u8 bank_group;
+	u8 bank;
+	u8 row[3];
+	u8 column[2];
+	u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
+	u8 reserved[0x17];
+} __packed;
+
+/*
+ * Get Health Info Record
+ * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+	u8 health_status;
+	u8 media_status;
+	u8 add_status;
+	u8 life_used;
+	u8 device_temp[2];
+	u8 dirty_shutdown_cnt[4];
+	u8 cor_vol_err_cnt[4];
+	u8 cor_per_err_cnt[4];
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_event_mem_module {
+	struct cxl_event_record_hdr hdr;
+	u8 event_type;
+	struct cxl_get_health_info info;
+	u8 reserved[0x3d];
+} __packed;
+
+union cxl_event {
+	struct cxl_event_generic generic;
+	struct cxl_event_gen_media gen_media;
+	struct cxl_event_dram dram;
+	struct cxl_event_mem_module mem_module;
+	/* dram & gen_media event header */
+	struct cxl_event_media_hdr media_hdr;
+} __packed;
+
+/*
+ * Common Event Record Format; in event logs
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_raw {
+	uuid_t id;
+	union cxl_event event;
+} __packed;
+
+enum cxl_event_type {
+	CXL_CPER_EVENT_GENERIC,
+	CXL_CPER_EVENT_GEN_MEDIA,
+	CXL_CPER_EVENT_DRAM,
+	CXL_CPER_EVENT_MEM_MODULE,
+};
+
+#define CPER_CXL_DEVICE_ID_VALID		BIT(0)
+#define CPER_CXL_DEVICE_SN_VALID		BIT(1)
+#define CPER_CXL_COMP_EVENT_LOG_VALID		BIT(2)
+struct cxl_cper_event_rec {
+	struct {
+		u32 length;
+		u64 validation_bits;
+		struct cper_cxl_event_devid {
+			u16 vendor_id;
+			u16 device_id;
+			u8 func_num;
+			u8 device_num;
+			u8 bus_num;
+			u16 segment_num;
+			u16 slot_num; /* bits 2:0 reserved */
+			u8 reserved;
+		} __packed device_id;
+		struct cper_cxl_event_sn {
+			u32 lower_dw;
+			u32 upper_dw;
+		} __packed dev_serial_num;
+	} __packed hdr;
+
+	union cxl_event event;
+} __packed;
+
+struct cxl_cper_work_data {
+	enum cxl_event_type event_type;
+	struct cxl_cper_event_rec rec;
+};
+
+#ifdef CONFIG_ACPI_APEI_GHES
+int cxl_cper_register_work(struct work_struct *work);
+int cxl_cper_unregister_work(struct work_struct *work);
+int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
+#else
+static inline int cxl_cper_register_work(struct work_struct *work)
+{
+	return 0;
+}
+
+static inline int cxl_cper_unregister_work(struct work_struct *work)
+{
+	return 0;
+}
+static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
+{
+	return 0;
+}
+#endif
+
+#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
deleted file mode 100644
index 0bea1afbd747..000000000000
--- a/include/linux/cxl-event.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2023 Intel Corporation. */
-#ifndef _LINUX_CXL_EVENT_H
-#define _LINUX_CXL_EVENT_H
-
-#include <linux/types.h>
-#include <linux/uuid.h>
-#include <linux/workqueue_types.h>
-
-/*
- * Common Event Record Format
- * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
- */
-struct cxl_event_record_hdr {
-	u8 length;
-	u8 flags[3];
-	__le16 handle;
-	__le16 related_handle;
-	__le64 timestamp;
-	u8 maint_op_class;
-	u8 reserved[15];
-} __packed;
-
-struct cxl_event_media_hdr {
-	struct cxl_event_record_hdr hdr;
-	__le64 phys_addr;
-	u8 descriptor;
-	u8 type;
-	u8 transaction_type;
-	/*
-	 * The meaning of Validity Flags from bit 2 is
-	 * different across DRAM and General Media records
-	 */
-	u8 validity_flags[2];
-	u8 channel;
-	u8 rank;
-} __packed;
-
-#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
-struct cxl_event_generic {
-	struct cxl_event_record_hdr hdr;
-	u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
-} __packed;
-
-/*
- * General Media Event Record
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
- */
-#define CXL_EVENT_GEN_MED_COMP_ID_SIZE	0x10
-struct cxl_event_gen_media {
-	struct cxl_event_media_hdr media_hdr;
-	u8 device[3];
-	u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
-	u8 reserved[46];
-} __packed;
-
-/*
- * DRAM Event Record - DER
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
- */
-#define CXL_EVENT_DER_CORRECTION_MASK_SIZE	0x20
-struct cxl_event_dram {
-	struct cxl_event_media_hdr media_hdr;
-	u8 nibble_mask[3];
-	u8 bank_group;
-	u8 bank;
-	u8 row[3];
-	u8 column[2];
-	u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
-	u8 reserved[0x17];
-} __packed;
-
-/*
- * Get Health Info Record
- * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
- */
-struct cxl_get_health_info {
-	u8 health_status;
-	u8 media_status;
-	u8 add_status;
-	u8 life_used;
-	u8 device_temp[2];
-	u8 dirty_shutdown_cnt[4];
-	u8 cor_vol_err_cnt[4];
-	u8 cor_per_err_cnt[4];
-} __packed;
-
-/*
- * Memory Module Event Record
- * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
- */
-struct cxl_event_mem_module {
-	struct cxl_event_record_hdr hdr;
-	u8 event_type;
-	struct cxl_get_health_info info;
-	u8 reserved[0x3d];
-} __packed;
-
-union cxl_event {
-	struct cxl_event_generic generic;
-	struct cxl_event_gen_media gen_media;
-	struct cxl_event_dram dram;
-	struct cxl_event_mem_module mem_module;
-	/* dram & gen_media event header */
-	struct cxl_event_media_hdr media_hdr;
-} __packed;
-
-/*
- * Common Event Record Format; in event logs
- * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
- */
-struct cxl_event_record_raw {
-	uuid_t id;
-	union cxl_event event;
-} __packed;
-
-enum cxl_event_type {
-	CXL_CPER_EVENT_GENERIC,
-	CXL_CPER_EVENT_GEN_MEDIA,
-	CXL_CPER_EVENT_DRAM,
-	CXL_CPER_EVENT_MEM_MODULE,
-};
-
-#define CPER_CXL_DEVICE_ID_VALID		BIT(0)
-#define CPER_CXL_DEVICE_SN_VALID		BIT(1)
-#define CPER_CXL_COMP_EVENT_LOG_VALID		BIT(2)
-struct cxl_cper_event_rec {
-	struct {
-		u32 length;
-		u64 validation_bits;
-		struct cper_cxl_event_devid {
-			u16 vendor_id;
-			u16 device_id;
-			u8 func_num;
-			u8 device_num;
-			u8 bus_num;
-			u16 segment_num;
-			u16 slot_num; /* bits 2:0 reserved */
-			u8 reserved;
-		} __packed device_id;
-		struct cper_cxl_event_sn {
-			u32 lower_dw;
-			u32 upper_dw;
-		} __packed dev_serial_num;
-	} __packed hdr;
-
-	union cxl_event event;
-} __packed;
-
-struct cxl_cper_work_data {
-	enum cxl_event_type event_type;
-	struct cxl_cper_event_rec rec;
-};
-
-#ifdef CONFIG_ACPI_APEI_GHES
-int cxl_cper_register_work(struct work_struct *work);
-int cxl_cper_unregister_work(struct work_struct *work);
-int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
-#else
-static inline int cxl_cper_register_work(struct work_struct *work)
-{
-	return 0;
-}
-
-static inline int cxl_cper_unregister_work(struct work_struct *work)
-{
-	return 0;
-}
-static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
-{
-	return 0;
-}
-#endif
-
-#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h
deleted file mode 100644
index 624ff6ff41f9..000000000000
--- a/include/linux/einj-cxl.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CXL protocol Error INJection support.
- *
- * Copyright (c) 2023 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Author: Ben Cheatham <benjamin.cheatham@amd.com>
- */
-#ifndef EINJ_CXL_H
-#define EINJ_CXL_H
-
-#include <linux/errno.h>
-#include <linux/types.h>
-
-struct pci_dev;
-struct seq_file;
-
-#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
-int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
-int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
-int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
-bool einj_cxl_is_initialized(void);
-#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
-static inline int einj_cxl_available_error_type_show(struct seq_file *m,
-						     void *v)
-{
-	return -ENXIO;
-}
-
-static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
-{
-	return -ENXIO;
-}
-
-static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
-{
-	return -ENXIO;
-}
-
-static inline bool einj_cxl_is_initialized(void) { return false; }
-#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
-
-#endif /* EINJ_CXL_H */
-- 
cgit v1.2.3


From eb3b3f520518003cd363239fc160bdd7ed327319 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 10 Sep 2024 00:31:49 +0200
Subject: dt-bindings: clock, reset: fix top-comment indentation rk3576 headers

Block comments should align the * on each line, as checkpatch rightfully
pointed out, so fix that style issue on the newly added rk3576 headers.

Fixes: 49c04453db81 ("dt-bindings: clock, reset: Add support for rk3576")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240909223149.85364-1-heiko@sntech.de
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/rockchip,rk3576-cru.h | 12 ++++++------
 include/dt-bindings/reset/rockchip,rk3576-cru.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rockchip,rk3576-cru.h b/include/dt-bindings/clock/rockchip,rk3576-cru.h
index a2933021be89..25aed298ac2c 100644
--- a/include/dt-bindings/clock/rockchip,rk3576-cru.h
+++ b/include/dt-bindings/clock/rockchip,rk3576-cru.h
@@ -1,11 +1,11 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 /*
-* Copyright (c) 2023 Rockchip Electronics Co. Ltd.
-* Copyright (c) 2024 Collabora Ltd.
-*
-* Author: Elaine Zhang <zhangqing@rock-chips.com>
-* Author: Detlev Casanova <detlev.casanova@collabora.com>
-*/
+ * Copyright (c) 2023 Rockchip Electronics Co. Ltd.
+ * Copyright (c) 2024 Collabora Ltd.
+ *
+ * Author: Elaine Zhang <zhangqing@rock-chips.com>
+ * Author: Detlev Casanova <detlev.casanova@collabora.com>
+ */
 
 #ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H
 #define _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H
diff --git a/include/dt-bindings/reset/rockchip,rk3576-cru.h b/include/dt-bindings/reset/rockchip,rk3576-cru.h
index 291fec0ecba0..ae856906f3a3 100644
--- a/include/dt-bindings/reset/rockchip,rk3576-cru.h
+++ b/include/dt-bindings/reset/rockchip,rk3576-cru.h
@@ -1,11 +1,11 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 /*
-* Copyright (c) 2023 Rockchip Electronics Co. Ltd.
-* Copyright (c) 2024 Collabora Ltd.
-*
-* Author: Elaine Zhang <zhangqing@rock-chips.com>
-* Author: Detlev Casanova <detlev.casanova@collabora.com>
-*/
+ * Copyright (c) 2023 Rockchip Electronics Co. Ltd.
+ * Copyright (c) 2024 Collabora Ltd.
+ *
+ * Author: Elaine Zhang <zhangqing@rock-chips.com>
+ * Author: Detlev Casanova <detlev.casanova@collabora.com>
+ */
 
 #ifndef _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H
 #define _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H
-- 
cgit v1.2.3


From 246d3aa3e53151fa150f10257ddd8a4facd31a6a Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Thu, 8 Aug 2024 12:18:46 +0100
Subject: mm: cleanup count_mthp_stat() definition

Patch series "Shmem mTHP controls and stats improvements", v3.

This is a small series to tidy up the way the shmem controls and stats are
exposed.  These patches were previously part of the series at [2], but I
decided to split them out since they can go in independently.


This patch (of 2):

Let's move count_mthp_stat() so that it's always defined, even when THP is
disabled.  Previously uses of the function in files such as shmem.c, which
are compiled even when THP is disabled, required ugly THP ifdeferry.  With
this cleanup, we can remove those ifdefs and the function resolves to a
nop when THP is disabled.

I shortly plan to call count_mthp_stat() from more THP-invariant source
files.

Link: https://lkml.kernel.org/r/20240808111849.651867-1-ryan.roberts@arm.com
Link: https://lkml.kernel.org/r/20240808111849.651867-2-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: Barry Song <baohua@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 70 ++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6370026689e0..4c32058cacfe 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -114,6 +114,41 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
 #define HPAGE_PUD_MASK	(~(HPAGE_PUD_SIZE - 1))
 #define HPAGE_PUD_SIZE	((1UL) << HPAGE_PUD_SHIFT)
 
+enum mthp_stat_item {
+	MTHP_STAT_ANON_FAULT_ALLOC,
+	MTHP_STAT_ANON_FAULT_FALLBACK,
+	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
+	MTHP_STAT_SWPOUT,
+	MTHP_STAT_SWPOUT_FALLBACK,
+	MTHP_STAT_SHMEM_ALLOC,
+	MTHP_STAT_SHMEM_FALLBACK,
+	MTHP_STAT_SHMEM_FALLBACK_CHARGE,
+	MTHP_STAT_SPLIT,
+	MTHP_STAT_SPLIT_FAILED,
+	MTHP_STAT_SPLIT_DEFERRED,
+	__MTHP_STAT_COUNT
+};
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
+struct mthp_stat {
+	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
+};
+
+DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
+
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+	if (order <= 0 || order > PMD_ORDER)
+		return;
+
+	this_cpu_inc(mthp_stats.stats[order][item]);
+}
+#else
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 extern unsigned long transparent_hugepage_flags;
@@ -269,41 +304,6 @@ struct thpsize {
 
 #define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
 
-enum mthp_stat_item {
-	MTHP_STAT_ANON_FAULT_ALLOC,
-	MTHP_STAT_ANON_FAULT_FALLBACK,
-	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
-	MTHP_STAT_SWPOUT,
-	MTHP_STAT_SWPOUT_FALLBACK,
-	MTHP_STAT_SHMEM_ALLOC,
-	MTHP_STAT_SHMEM_FALLBACK,
-	MTHP_STAT_SHMEM_FALLBACK_CHARGE,
-	MTHP_STAT_SPLIT,
-	MTHP_STAT_SPLIT_FAILED,
-	MTHP_STAT_SPLIT_DEFERRED,
-	__MTHP_STAT_COUNT
-};
-
-struct mthp_stat {
-	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
-};
-
-#ifdef CONFIG_SYSFS
-DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
-
-static inline void count_mthp_stat(int order, enum mthp_stat_item item)
-{
-	if (order <= 0 || order > PMD_ORDER)
-		return;
-
-	this_cpu_inc(mthp_stats.stats[order][item]);
-}
-#else
-static inline void count_mthp_stat(int order, enum mthp_stat_item item)
-{
-}
-#endif
-
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
-- 
cgit v1.2.3


From 5d65c8d758f2596c008009e39bb2614deed2c730 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sat, 24 Aug 2024 13:04:40 +1200
Subject: mm: count the number of anonymous THPs per size

Patch series "mm: count the number of anonymous THPs per size", v4.

Knowing the number of transparent anon THPs in the system is crucial
for performance analysis. It helps in understanding the ratio and
distribution of THPs versus small folios throughout the system.

Additionally, partial unmapping by userspace can lead to significant waste
of THPs over time and increase memory reclamation pressure. We need this
information for comprehensive system tuning.


This patch (of 2):

Let's track for each anonymous THP size, how many of them are currently
allocated.  We'll track the complete lifespan of an anon THP, starting
when it becomes an anon THP ("large anon folio") (->mapping gets set),
until it gets freed (->mapping gets cleared).

Introduce a new "nr_anon" counter per THP size and adjust the
corresponding counter in the following cases:
* We allocate a new THP and call folio_add_new_anon_rmap() to map
   it the first time and turn it into an anon THP.
* We split an anon THP into multiple smaller ones.
* We migrate an anon THP, when we prepare the destination.
* We free an anon THP back to the buddy.

Note that AnonPages in /proc/meminfo currently tracks the total number of
*mapped* anonymous *pages*, and therefore has slightly different
semantics.  In the future, we might also want to track "nr_anon_mapped"
for each THP size, which might be helpful when comparing it to the number
of allocated anon THPs (long-term pinning, stuck in swapcache, memory
leaks, ...).

Further note that for now, we only track anon THPs after they got their
->mapping set, for example via folio_add_new_anon_rmap().  If we would
allocate some in the swapcache, they will only show up in the statistics
for now after they have been mapped to user space the first time, where we
call folio_add_new_anon_rmap().

[akpm@linux-foundation.org: documentation fixups, per David]
  Link: https://lkml.kernel.org/r/3e8add35-e26b-443b-8a04-1078f4bc78f6@redhat.com
Link: https://lkml.kernel.org/r/20240824010441.21308-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240824010441.21308-2-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuai Yuan <yuanshuai@oppo.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4c32058cacfe..2ee2971e4e10 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -126,6 +126,7 @@ enum mthp_stat_item {
 	MTHP_STAT_SPLIT,
 	MTHP_STAT_SPLIT_FAILED,
 	MTHP_STAT_SPLIT_DEFERRED,
+	MTHP_STAT_NR_ANON,
 	__MTHP_STAT_COUNT
 };
 
@@ -136,14 +137,24 @@ struct mthp_stat {
 
 DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
 
-static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
 {
 	if (order <= 0 || order > PMD_ORDER)
 		return;
 
-	this_cpu_inc(mthp_stats.stats[order][item]);
+	this_cpu_add(mthp_stats.stats[order][item], delta);
+}
+
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+	mod_mthp_stat(order, item, 1);
 }
+
 #else
+static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
+{
+}
+
 static inline void count_mthp_stat(int order, enum mthp_stat_item item)
 {
 }
-- 
cgit v1.2.3


From 8175ebfd302abe6fbdca9037f763ecbfdb8db572 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sat, 24 Aug 2024 13:04:41 +1200
Subject: mm: count the number of partially mapped anonymous THPs per size

When a THP is added to the deferred_list due to partially mapped, its
partial pages are unused, leading to wasted memory and potentially
increasing memory reclamation pressure.

Detailing the specifics of how unmapping occurs is quite difficult and not
that useful, so we adopt a simple approach: each time a THP enters the
deferred_list, we increment the count by 1; whenever it leaves for any
reason, we decrement the count by 1.

Link: https://lkml.kernel.org/r/20240824010441.21308-3-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuai Yuan <yuanshuai@oppo.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2ee2971e4e10..4902e2f7e896 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -127,6 +127,7 @@ enum mthp_stat_item {
 	MTHP_STAT_SPLIT_FAILED,
 	MTHP_STAT_SPLIT_DEFERRED,
 	MTHP_STAT_NR_ANON,
+	MTHP_STAT_NR_ANON_PARTIALLY_MAPPED,
 	__MTHP_STAT_COUNT
 };
 
-- 
cgit v1.2.3


From b7315fbb64736acad3cd33851884b222b00dc0f4 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sun, 25 Aug 2024 21:23:20 -0700
Subject: mm/damon/core: introduce per-context region priorities histogram
 buffer

Patch series "replace per-quota region priorities histogram buffer with
per-context one".

Each DAMOS quota (struct damos_quota) maintains a histogram for total
regions size per its prioritization score.  DAMOS calcultes minimum
prioritization score of regions that are ok to apply the DAMOS action to
while respecting the quota.  The histogram is constructed only for the
calculation of the minimum score in damos_adjust_quota() for each quota
which called by kdamond_fn().

Hence, there is no real reason to have per-quota histogram.  Only
per-kdamond histogram is needed, since parallel kdamonds could have races
otherwise.  The current implementation is only wasting the memory, and can
easily cause unintended stack usage[1].

So, introducing a per-kdamond histogram and replacing the per-quota one
with it would be the right solution for the issue.  However, supporting
multiple DAMON contexts per kdamond is still an ongoing work[2] without a
clear estimated time of arrival.  Meanwhile, per-context histogram could
be an effective and straightforward solution having no blocker.  Let's fix
the problem first in the way.


This patch (of 4):

Introduce per-context buffer for region priority scores-total size
histogram.  Same to the per-quota one (->histogram of struct damos_quota),
the new buffer is hidden from DAMON API users by being defined as a
private field of DAMON context structure.  It is dynamically allocated and
de-allocated at the beginning and ending of the execution of the kdamond
by kdamond_fn() itself.

[1] commit 0742cadf5e4c ("mm/damon/lru_sort: adjust local variable to dynamic allocation")
[2] https://lore.kernel.org/20240531122320.909060-1-yorha.op@gmail.com

Link: https://lkml.kernel.org/r/20240826042323.87025-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20240826042323.87025-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 27c546bfc6d4..60cb8eada3d6 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -630,6 +630,8 @@ struct damon_ctx {
 	unsigned long next_ops_update_sis;
 	/* for waiting until the execution of the kdamond_fn is started */
 	struct completion kdamond_started;
+	/* for scheme quotas prioritization */
+	unsigned long *regions_score_histogram;
 
 /* public: */
 	struct task_struct *kdamond;
-- 
cgit v1.2.3


From e3bcb1672583f2e1cf456e4303ce562a3cc775c0 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sun, 25 Aug 2024 21:23:22 -0700
Subject: mm/damon/core: remove per-scheme region priority histogram buffer

Nobody is reading from or writing to the per-scheme region priorities
histogram buffer.  It is only wasting memory.  Remove it.

Link: https://lkml.kernel.org/r/20240826042323.87025-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 60cb8eada3d6..a67f2c4940e9 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -233,7 +233,6 @@ struct damos_quota {
 	unsigned long charge_addr_from;
 
 	/* For prioritization */
-	unsigned long histogram[DAMOS_MAX_SCORE + 1];
 	unsigned int min_score;
 
 	/* For feedback loop */
-- 
cgit v1.2.3


From 17d75422604f0b92869aa17cb44f60958212f033 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sat, 31 Aug 2024 08:28:22 +1200
Subject: mm: document __GFP_NOFAIL must be blockable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Non-blocking allocation with __GFP_NOFAIL is not supported and may still
result in NULL pointers (if we don't return NULL, we result in busy-loop
within non-sleepable contexts):

static inline struct page *
__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
						struct alloc_context *ac)
{
	...
	/*
	 * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
	 * we always retry
	 */
	if (gfp_mask & __GFP_NOFAIL) {
		/*
		 * All existing users of the __GFP_NOFAIL are blockable, so warn
		 * of any new users that actually require GFP_NOWAIT
		 */
		if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
			goto fail;
		...
	}
	...
fail:
	warn_alloc(gfp_mask, ac->nodemask,
			"page allocation failure: order:%u", order);
got_pg:
	return page;
}

Highlight this in the documentation of __GFP_NOFAIL so that non-mm
subsystems can reject any illegal usage of __GFP_NOFAIL with GFP_ATOMIC,
GFP_NOWAIT, etc.

Link: https://lkml.kernel.org/r/20240830202823.21478-3-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Eugenio Pérez" <eperezma@redhat.com>
Cc: Hailong.Liu <hailong.liu@oppo.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Xie Yongji <xieyongji@bytedance.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp_types.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 313be4ad79fd..4a1fa7706b0c 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -215,7 +215,8 @@ enum {
  * the caller still has to check for failures) while costly requests try to be
  * not disruptive and back off even without invoking the OOM killer.
  * The following three modifiers might be used to override some of these
- * implicit rules.
+ * implicit rules. Please note that all of them must be used along with
+ * %__GFP_DIRECT_RECLAIM flag.
  *
  * %__GFP_NORETRY: The VM implementation will try only very lightweight
  * memory direct reclaim to get some memory under memory pressure (thus
@@ -246,6 +247,8 @@ enum {
  * cannot handle allocation failures. The allocation could block
  * indefinitely but will never return with failure. Testing for
  * failure is pointless.
+ * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM.
+ * It should _never_ be used in non-sleepable contexts.
  * New users should be evaluated carefully (and the flag should be
  * used only when there is no reasonable failure policy) but it is
  * definitely preferable to use the flag rather than opencode endless
-- 
cgit v1.2.3


From 903edea6c53f097f5f0c847fdbbfab0c6c44f241 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sat, 31 Aug 2024 08:28:23 +1200
Subject: mm: warn about illegal __GFP_NOFAIL usage in a more appropriate
 location and manner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three points for this change:

1. We should consolidate all warnings in one place. Currently, the
   order > 1 warning is in the hotpath, while others are in less
   likely scenarios. Moving all warnings to the slowpath will reduce
   the overhead for order > 1 and increase the visibility of other
   warnings.

2. We currently have two warnings for order: one for order > 1 in
   the hotpath and another for order > costly_order in the laziest
   path. I suggest standardizing on order > 1 since it's been in
   use for a long time.

3. We don't need to check for __GFP_NOWARN in this case. __GFP_NOWARN
   is meant to suppress allocation failure reports, but here we're
   dealing with bug detection, not allocation failures. So replace
   WARN_ON_ONCE_GFP by WARN_ON_ONCE.

[v-songbaohua@oppo.com: also update the doc for __GFP_NOFAIL with order > 1]
  Link: https://lkml.kernel.org/r/20240903223935.1697-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240830202823.21478-4-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Eugenio Pérez" <eperezma@redhat.com>
Cc: Hailong.Liu <hailong.liu@oppo.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Xie Yongji <xieyongji@bytedance.com>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 4a1fa7706b0c..65db9349f905 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -253,7 +253,8 @@ enum {
  * used only when there is no reasonable failure policy) but it is
  * definitely preferable to use the flag rather than opencode endless
  * loop around allocator.
- * Using this flag for costly allocations is _highly_ discouraged.
+ * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is
+ * not supported. Please consider using kvmalloc() instead.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
-- 
cgit v1.2.3


From b1f202060afeb7fcb98473929d26fd3d2093b067 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Fri, 30 Aug 2024 11:03:36 +0100
Subject: mm: remap unused subpages to shared zeropage when splitting isolated
 thp

Patch series "mm: split underused THPs", v5.

The current upstream default policy for THP is always.  However, Meta uses
madvise in production as the current THP=always policy vastly
overprovisions THPs in sparsely accessed memory areas, resulting in
excessive memory pressure and premature OOM killing.  Using madvise +
relying on khugepaged has certain drawbacks over THP=always.  Using
madvise hints mean THPs aren't "transparent" and require userspace
changes.  Waiting for khugepaged to scan memory and collapse pages into
THP can be slow and unpredictable in terms of performance (i.e.  you dont
know when the collapse will happen), while production environments require
predictable performance.  If there is enough memory available, its better
for both performance and predictability to have a THP from fault time,
i.e.  THP=always rather than wait for khugepaged to collapse it, and deal
with sparsely populated THPs when the system is running out of memory.

This patch series is an attempt to mitigate the issue of running out of
memory when THP is always enabled.  During runtime whenever a THP is being
faulted in or collapsed by khugepaged, the THP is added to a list.
Whenever memory reclaim happens, the kernel runs the deferred_split
shrinker which goes through the list and checks if the THP was underused,
i.e.  how many of the base 4K pages of the entire THP were zero-filled.
If this number goes above a certain threshold, the shrinker will attempt
to split that THP.  Then at remap time, the pages that were zero-filled
are mapped to the shared zeropage, hence saving memory.  This method
avoids the downside of wasting memory in areas where THP is sparsely
filled when THP is always enabled, while still providing the upside THPs
like reduced TLB misses without having to use madvise.

Meta production workloads that were CPU bound (>99% CPU utilzation) were
tested with THP shrinker.  The results after 2 hours are as follows:

                            | THP=madvise |  THP=always   | THP=always
                            |             |               | + shrinker series
                            |             |               | + max_ptes_none=409
-----------------------------------------------------------------------------
Performance improvement     |      -      |    +1.8%      |     +1.7%
(over THP=madvise)          |             |               |
-----------------------------------------------------------------------------
Memory usage                |    54.6G    | 58.8G (+7.7%) |   55.9G (+2.4%)
-----------------------------------------------------------------------------
max_ptes_none=409 means that any THP that has more than 409 out of 512
(80%) zero filled filled pages will be split.

To test out the patches, the below commands without the shrinker will
invoke OOM killer immediately and kill stress, but will not fail with the
shrinker:

echo 450 > /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
mkdir /sys/fs/cgroup/test
echo $$ > /sys/fs/cgroup/test/cgroup.procs
echo 20M > /sys/fs/cgroup/test/memory.max
echo 0 > /sys/fs/cgroup/test/memory.swap.max
# allocate twice memory.max for each stress worker and touch 40/512 of
# each THP, i.e. vm-stride 50K.
# With the shrinker, max_ptes_none of 470 and below won't invoke OOM
# killer.
# Without the shrinker, OOM killer is invoked immediately irrespective
# of max_ptes_none value and kills stress.
stress --vm 1 --vm-bytes 40M --vm-stride 50K


This patch (of 5):

Here being unused means containing only zeros and inaccessible to
userspace.  When splitting an isolated thp under reclaim or migration, the
unused subpages can be mapped to the shared zeropage, hence saving memory.
This is particularly helpful when the internal fragmentation of a thp is
high, i.e.  it has many untouched subpages.

This is also a prerequisite for THP low utilization shrinker which will be
introduced in later patches, where underutilized THPs are split, and the
zero-filled pages are freed saving memory.

Link: https://lkml.kernel.org/r/20240830100438.3623486-1-usamaarif642@gmail.com
Link: https://lkml.kernel.org/r/20240830100438.3623486-3-usamaarif642@gmail.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Tested-by: Shuang Zhai <zhais@google.com>
Cc: Alexander Zhu <alexlzhu@fb.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shuang Zhai <szhai2@cs.rochester.edu>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 91b5935e8485..d5e93e44322e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -745,7 +745,12 @@ int folio_mkclean(struct folio *);
 int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 		      struct vm_area_struct *vma);
 
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
+enum rmp_flags {
+	RMP_LOCKED		= 1 << 0,
+	RMP_USE_SHARED_ZEROPAGE	= 1 << 1,
+};
+
+void remove_migration_ptes(struct folio *src, struct folio *dst, int flags);
 
 /*
  * rmap_walk_control: To control rmap traversing for specific needs
-- 
cgit v1.2.3


From 8422acdc97ed5839692b45f800dbfb78abe65a94 Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Fri, 30 Aug 2024 11:03:38 +0100
Subject: mm: introduce a pageflag for partially mapped folios

Currently folio->_deferred_list is used to keep track of partially_mapped
folios that are going to be split under memory pressure.  In the next
patch, all THPs that are faulted in and collapsed by khugepaged are also
going to be tracked using _deferred_list.

This patch introduces a pageflag to be able to distinguish between
partially mapped folios and others in the deferred_list at split time in
deferred_split_scan.  Its needed as __folio_remove_rmap decrements
_mapcount, _large_mapcount and _entire_mapcount, hence it won't be
possible to distinguish between partially mapped folios and others in
deferred_split_scan.

Eventhough it introduces an extra flag to track if the folio is partially
mapped, there is no functional change intended with this patch and the
flag is not useful in this patch itself, it will become useful in the next
patch when _deferred_list has non partially mapped folios.

Link: https://lkml.kernel.org/r/20240830100438.3623486-5-usamaarif642@gmail.com
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Cc: Alexander Zhu <alexlzhu@fb.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shuang Zhai <zhais@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Shuang Zhai <szhai2@cs.rochester.edu>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h    |  4 ++--
 include/linux/page-flags.h | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4902e2f7e896..e3896ebf0f94 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -333,7 +333,7 @@ static inline int split_huge_page(struct page *page)
 {
 	return split_huge_page_to_list_to_order(page, NULL, 0);
 }
-void deferred_split_folio(struct folio *folio);
+void deferred_split_folio(struct folio *folio, bool partially_mapped);
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct folio *folio);
@@ -507,7 +507,7 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
-static inline void deferred_split_folio(struct folio *folio) {}
+static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2175ebceb41c..1b3a76710487 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -186,6 +186,7 @@ enum pageflags {
 	/* At least one page in this folio has the hwpoison flag set */
 	PG_has_hwpoisoned = PG_active,
 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
+	PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */
 };
 
 #define PAGEFLAGS_MASK		((1UL << NR_PAGEFLAGS) - 1)
@@ -859,8 +860,18 @@ static inline void ClearPageCompound(struct page *page)
 	ClearPageHead(page);
 }
 FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE)
+FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
+/*
+ * PG_partially_mapped is protected by deferred_split split_queue_lock,
+ * so its safe to use non-atomic set/clear.
+ */
+__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
+__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
 #else
 FOLIO_FLAG_FALSE(large_rmappable)
+FOLIO_TEST_FLAG_FALSE(partially_mapped)
+__FOLIO_SET_FLAG_NOOP(partially_mapped)
+__FOLIO_CLEAR_FLAG_NOOP(partially_mapped)
 #endif
 
 #define PG_head_mask ((1UL << PG_head))
@@ -1171,7 +1182,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
  */
 #define PAGE_FLAGS_SECOND						\
 	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
-	 1UL << PG_large_rmappable)
+	 1UL << PG_large_rmappable	| 1UL << PG_partially_mapped)
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
-- 
cgit v1.2.3


From dafff3f4c850c98e15501bc6ee20581f9a013dcc Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Fri, 30 Aug 2024 11:03:39 +0100
Subject: mm: split underused THPs

This is an attempt to mitigate the issue of running out of memory when THP
is always enabled.  During runtime whenever a THP is being faulted in
(__do_huge_pmd_anonymous_page) or collapsed by khugepaged
(collapse_huge_page), the THP is added to _deferred_list.  Whenever memory
reclaim happens in linux, the kernel runs the deferred_split shrinker
which goes through the _deferred_list.

If the folio was partially mapped, the shrinker attempts to split it.  If
the folio is not partially mapped, the shrinker checks if the THP was
underused, i.e.  how many of the base 4K pages of the entire THP were
zero-filled.  If this number goes above a certain threshold (decided by
/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none), the
shrinker will attempt to split that THP.  Then at remap time, the pages
that were zero-filled are mapped to the shared zeropage, hence saving
memory.

Link: https://lkml.kernel.org/r/20240830100438.3623486-6-usamaarif642@gmail.com
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Suggested-by: Rik van Riel <riel@surriel.com>
Co-authored-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Alexander Zhu <alexlzhu@fb.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shuang Zhai <zhais@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Shuang Zhai <szhai2@cs.rochester.edu>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/khugepaged.h    | 1 +
 include/linux/vm_event_item.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index f68865e19b0b..30baae91b225 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -4,6 +4,7 @@
 
 #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
 
+extern unsigned int khugepaged_max_ptes_none __read_mostly;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index aae5c7c5cfb4..aed952d04132 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -105,6 +105,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
+		THP_UNDERUSED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
 		THP_SCAN_EXCEED_NONE_PTE,
 		THP_SCAN_EXCEED_SWAP_PTE,
-- 
cgit v1.2.3


From 0a6fff20d36bc81156a49649f622b152330fed3c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 1 Sep 2024 16:24:59 -0400
Subject: mm: fix folio_alloc_noprof()

folio_alloc_noprof) wasn't calling the _noprof version, causing
allocations to be accounted here instead of to the caller

Link: https://lkml.kernel.org/r/20240901202459.4867-1-kent.overstreet@linux.dev
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 03ba9563c6db..a951de920e20 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -319,7 +319,7 @@ static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order
 }
 static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
 {
-	return __folio_alloc_node(gfp, order, numa_node_id());
+	return __folio_alloc_node_noprof(gfp, order, numa_node_id());
 }
 static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
 		struct mempolicy *mpol, pgoff_t ilx, int nid)
-- 
cgit v1.2.3


From 6050df6d706f58b2240bfabece9f406170104d57 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Fri, 9 Aug 2024 02:01:15 +0000
Subject: maple_tree: fix comment typo on ma_flag of allocation tree

The maple tree flag of allocation tree is MT_FLAGS_ALLOC_RANGE.

Just correct it.

Link: https://lkml.kernel.org/r/20240809020115.31575-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 8e1504a81cd2..c2c11004085e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -52,9 +52,9 @@
  * bit in the node type.  This is possible by using bit 1 to indicate if bit 2
  * is part of the type or the slot.
  *
- * Once the type is decided, the decision of an allocation range type or a range
- * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE
- * flag.
+ * Once the type is decided, the decision of an allocation range type or a
+ * range type is done by examining the immutable tree flag for the
+ * MT_FLAGS_ALLOC_RANGE flag.
  *
  *  Node types:
  *   0x??1 = Root
-- 
cgit v1.2.3


From 4fc4187984e5dbd7ad63c3acf0b228fa9bf298d3 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Mon, 2 Sep 2024 19:55:49 +0900
Subject: lib: zstd: export API needed for dictionary support

Patch series "zram: introduce custom comp backends API", v7.

This series introduces support for run-time compression algorithms tuning,
so users, for instance, can adjust compression/acceleration levels and
provide pre-trained compression/decompression dictionaries which certain
algorithms support.

At this point we stop supporting (old/deprecated) comp API.  We may add
new acomp API support in the future, but before that zram needs to undergo
some major rework (we are not ready for async compression).

Some benchmarks for reference (look at column #2)

*** init zstd
/sys/block/zram0/mm_stat
1750659072 504622188 514355200        0 514355200        1        0    34204    34204

*** init zstd dict=/home/ss/zstd-dict-amd64
/sys/block/zram0/mm_stat
1750650880 465908890 475398144        0 475398144        1        0    34185    34185

*** init zstd level=8 dict=/home/ss/zstd-dict-amd64
/sys/block/zram0/mm_stat
1750654976 430803319 439873536        0 439873536        1        0    34185    34185

*** init lz4
/sys/block/zram0/mm_stat
1750646784 664266564 677060608        0 677060608        1        0    34288    34288

*** init lz4 dict=/home/ss/lz4-dict-amd64
/sys/block/zram0/mm_stat
1750650880 619990300 632102912        0 632102912        1        0    34278    34278

*** init lz4hc
/sys/block/zram0/mm_stat
1750630400 609023822 621232128        0 621232128        1        0    34288    34288

*** init lz4hc dict=/home/ss/lz4-dict-amd64
/sys/block/zram0/mm_stat
1750659072 505133172 515231744        0 515231744        1        0    34278    34278


Recompress
init zram zstd (prio=0), zstd level=5 (prio 1), zstd with dict (prio 2)

*** zstd
/sys/block/zram0/mm_stat
1750982656 504630584 514269184        0 514269184        1        0    34204    34204

*** idle recompress priority=1 (zstd level=5)
/sys/block/zram0/mm_stat
1750982656 488645601 525438976        0 514269184        1        0    34204    34204

*** idle recompress priority=2 (zstd dict)
/sys/block/zram0/mm_stat
1750982656 460869640 517914624        0 514269184        1        0    34185    34204


This patch (of 24):

We need to export a number of API functions that enable advanced zstd
usage - C/D dictionaries, dictionaries sharing between contexts, etc.

Link: https://lkml.kernel.org/r/20240902105656.1383858-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20240902105656.1383858-2-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Nick Terrell <terrelln@fb.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zstd.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 167 insertions(+)

(limited to 'include')

diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index 113408eef6ec..b2c7cf310c8f 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -77,6 +77,30 @@ int zstd_min_clevel(void);
  */
 int zstd_max_clevel(void);
 
+/**
+ * zstd_default_clevel() - default compression level
+ *
+ * Return: Default compression level.
+ */
+int zstd_default_clevel(void);
+
+/**
+ * struct zstd_custom_mem - custom memory allocation
+ */
+typedef ZSTD_customMem zstd_custom_mem;
+
+/**
+ * struct zstd_dict_load_method - Dictionary load method.
+ * See zstd_lib.h.
+ */
+typedef ZSTD_dictLoadMethod_e zstd_dict_load_method;
+
+/**
+ * struct zstd_dict_content_type - Dictionary context type.
+ * See zstd_lib.h.
+ */
+typedef ZSTD_dictContentType_e zstd_dict_content_type;
+
 /* ======   Parameter Selection   ====== */
 
 /**
@@ -136,6 +160,19 @@ typedef ZSTD_parameters zstd_parameters;
 zstd_parameters zstd_get_params(int level,
 	unsigned long long estimated_src_size);
 
+
+/**
+ * zstd_get_cparams() - returns zstd_compression_parameters for selected level
+ * @level:              The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ *                      if unknown.
+ * @dict_size:          Dictionary size.
+ *
+ * Return:              The selected zstd_compression_parameters.
+ */
+zstd_compression_parameters zstd_get_cparams(int level,
+	unsigned long long estimated_src_size, size_t dict_size);
+
 /* ======   Single-pass Compression   ====== */
 
 typedef ZSTD_CCtx zstd_cctx;
@@ -180,6 +217,71 @@ zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
 	const void *src, size_t src_size, const zstd_parameters *parameters);
 
+/**
+ * zstd_create_cctx_advanced() - Create compression context
+ * @custom_mem:   Custom allocator.
+ *
+ * Return:        NULL on error, pointer to compression context otherwise.
+ */
+zstd_cctx *zstd_create_cctx_advanced(zstd_custom_mem custom_mem);
+
+/**
+ * zstd_free_cctx() - Free compression context
+ * @cdict:        Pointer to compression context.
+ *
+ * Return:        Always 0.
+ */
+size_t zstd_free_cctx(zstd_cctx* cctx);
+
+/**
+ * struct zstd_cdict - Compression dictionary.
+ * See zstd_lib.h.
+ */
+typedef ZSTD_CDict zstd_cdict;
+
+/**
+ * zstd_create_cdict_byreference() - Create compression dictionary
+ * @dict:              Pointer to dictionary buffer.
+ * @dict_size:         Size of the dictionary buffer.
+ * @dict_load_method:  Dictionary load method.
+ * @dict_content_type: Dictionary content type.
+ * @custom_mem:        Memory allocator.
+ *
+ * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be
+ * free before zstd_cdict is destroyed.
+ *
+ * Return:             NULL on error, pointer to compression dictionary
+ *                     otherwise.
+ */
+zstd_cdict *zstd_create_cdict_byreference(const void *dict, size_t dict_size,
+					  zstd_compression_parameters cparams,
+					  zstd_custom_mem custom_mem);
+
+/**
+ * zstd_free_cdict() - Free compression dictionary
+ * @cdict:        Pointer to compression dictionary.
+ *
+ * Return:        Always 0.
+ */
+size_t zstd_free_cdict(zstd_cdict* cdict);
+
+/**
+ * zstd_compress_using_cdict() - compress src into dst using a dictionary
+ * @cctx:         The context. Must have been initialized with zstd_init_cctx().
+ * @dst:          The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:          The data to compress.
+ * @src_size:     The size of the data to compress.
+ * @cdict:        The dictionary to be used.
+ *
+ * Return:        The compressed size or an error, which can be checked using
+ *                zstd_is_error().
+ */
+size_t zstd_compress_using_cdict(zstd_cctx *cctx, void *dst,
+	size_t dst_capacity, const void *src, size_t src_size,
+	const zstd_cdict *cdict);
+
 /* ======   Single-pass Decompression   ====== */
 
 typedef ZSTD_DCtx zstd_dctx;
@@ -220,6 +322,71 @@ zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
 size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
 	const void *src, size_t src_size);
 
+/**
+ * struct zstd_ddict - Decompression dictionary.
+ * See zstd_lib.h.
+ */
+typedef ZSTD_DDict zstd_ddict;
+
+/**
+ * zstd_create_ddict_byreference() - Create decompression dictionary
+ * @dict:              Pointer to dictionary buffer.
+ * @dict_size:         Size of the dictionary buffer.
+ * @dict_load_method:  Dictionary load method.
+ * @dict_content_type: Dictionary content type.
+ * @custom_mem:        Memory allocator.
+ *
+ * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be
+ * free before zstd_ddict is destroyed.
+ *
+ * Return:             NULL on error, pointer to decompression dictionary
+ *                     otherwise.
+ */
+zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size,
+					  zstd_custom_mem custom_mem);
+/**
+ * zstd_free_ddict() - Free decompression dictionary
+ * @dict:         Pointer to the dictionary.
+ *
+ * Return:        Always 0.
+ */
+size_t zstd_free_ddict(zstd_ddict *ddict);
+
+/**
+ * zstd_create_dctx_advanced() - Create decompression context
+ * @custom_mem:   Custom allocator.
+ *
+ * Return:        NULL on error, pointer to decompression context otherwise.
+ */
+zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem);
+
+/**
+ * zstd_free_dctx() -- Free decompression context
+ * @dctx:         Pointer to decompression context.
+ * Return:        Always 0.
+ */
+size_t zstd_free_dctx(zstd_dctx *dctx);
+
+/**
+ * zstd_decompress_using_ddict() - decompress src into dst using a dictionary
+ * @dctx:         The decompression context.
+ * @dst:          The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ *                as the decompressed size. If the caller cannot upper bound the
+ *                decompressed size, then it's better to use the streaming API.
+ * @src:          The zstd compressed data to decompress. Multiple concatenated
+ *                frames and skippable frames are allowed.
+ * @src_size:     The exact size of the data to decompress.
+ * @ddict:        The dictionary to be used.
+ *
+ * Return:        The decompressed size or an error, which can be checked using
+ *                zstd_is_error().
+ */
+size_t zstd_decompress_using_ddict(zstd_dctx *dctx,
+	void *dst, size_t dst_capacity, const void *src, size_t src_size,
+	const zstd_ddict *ddict);
+
+
 /* ======   Streaming Buffers   ====== */
 
 /**
-- 
cgit v1.2.3


From e1e4cfd01a6e75dd4c810aeac115340805cf63ff Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Tue, 3 Sep 2024 11:19:28 -0400
Subject: mm,tmpfs: consider end of file write in shmem_is_huge

Take the end of a file write into consideration when deciding whether or
not to use huge pages for tmpfs files when the tmpfs filesystem is mounted
with huge=within_size

This allows large writes that append to the end of a file to automatically
use large pages.

Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with
fio.  The numbers without THP or with huge=always stay the same, but the
performance with huge=within_size now matches that of huge=always.

huge		before		after
4kB pages	1560 MB/s	1560 MB/s
within_size	1560 MB/s	4720 MB/s
always:		4720 MB/s	4720 MB/s

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 1564d7d3ca61..515a9a6a3c6f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force);
+				loff_t write_end, bool shmem_huge_force);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force)
+				loff_t write_end, bool shmem_huge_force)
 {
 	return 0;
 }
@@ -143,8 +143,8 @@ enum sgp_type {
 	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
 };
 
-int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
-		enum sgp_type sgp);
+int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
+		struct folio **foliop, enum sgp_type sgp);
 struct folio *shmem_read_folio_gfp(struct address_space *mapping,
 		pgoff_t index, gfp_t gfp);
 
-- 
cgit v1.2.3


From 25d4054cc97484f2555709ac233f955f674e026a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 4 Sep 2024 17:57:59 +0100
Subject: mm: make arch_get_unmapped_area() take vm_flags by default

Patch series "mm: Care about shadow stack guard gap when getting an
unmapped area", v2.

As covered in the commit log for c44357c2e76b ("x86/mm: care about shadow
stack guard gap during placement") our current mmap() implementation does
not take care to ensure that a new mapping isn't placed with existing
mappings inside it's own guard gaps.  This is particularly important for
shadow stacks since if two shadow stacks end up getting placed adjacent to
each other then they can overflow into each other which weakens the
protection offered by the feature.

On x86 there is a custom arch_get_unmapped_area() which was updated by the
above commit to cover this case by specifying a start_gap for allocations
with VM_SHADOW_STACK.  Both arm64 and RISC-V have equivalent features and
use the generic implementation of arch_get_unmapped_area() so let's make
the equivalent change there so they also don't get shadow stack pages
placed without guard pages.  The arm64 and RISC-V shadow stack
implementations are currently on the list:

   https://lore.kernel.org/r/20240829-arm64-gcs-v12-0-42fec94743
   https://lore.kernel.org/lkml/20240403234054.2020347-1-debug@rivosinc.com/

Given the addition of the use of vm_flags in the generic implementation we
also simplify the set of possibilities that have to be dealt with in the
core code by making arch_get_unmapped_area() take vm_flags as standard.
This is a bit invasive since the prototype change touches quite a few
architectures but since the parameter is ignored the change is
straightforward, the simplification for the generic code seems worth it.


This patch (of 3):

When we introduced arch_get_unmapped_area_vmflags() in 961148704acd ("mm:
introduce arch_get_unmapped_area_vmflags()") we did so as part of properly
supporting guard pages for shadow stacks on x86_64, which uses a custom
arch_get_unmapped_area().  Equivalent features are also present on both
arm64 and RISC-V, both of which use the generic implementation of
arch_get_unmapped_area() and will require equivalent modification there.
Rather than continue to deal with having two versions of the functions
let's bite the bullet and have all implementations of
arch_get_unmapped_area() take vm_flags as a parameter.

The new parameter is currently ignored by all implementations other than
x86.  The only caller that doesn't have a vm_flags available is
mm_get_unmapped_area(), as for the x86 implementation and the wrapper used
on other architectures this is modified to supply no flags.

No functional changes.

Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-0-a46b8b6dc0ed@kernel.org
Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-1-a46b8b6dc0ed@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Acked-by: Helge Deller <deller@gmx.de>	[parisc]
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Chris Zankel <chris@zankel.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Naveen N Rao <naveen@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/mm.h | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 91546493c43d..c4d34abc45d4 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -179,27 +179,20 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
 				  struct rlimit *rlim_stack);
-extern unsigned long
-arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
-		       unsigned long, unsigned long);
-extern unsigned long
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		       unsigned long len, unsigned long pgoff,
+		       unsigned long flags, vm_flags_t vm_flags);
+unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
-			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags);
+			       unsigned long len, unsigned long pgoff,
+			       unsigned long flags, vm_flags_t);
 
 unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp,
 				   unsigned long addr, unsigned long len,
 				   unsigned long pgoff, unsigned long flags);
 
-unsigned long
-arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
-			       unsigned long len, unsigned long pgoff,
-			       unsigned long flags, vm_flags_t vm_flags);
-unsigned long
-arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr,
-				       unsigned long len, unsigned long pgoff,
-				       unsigned long flags, vm_flags_t);
-
 unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm,
 					   struct file *filp,
 					   unsigned long addr,
-- 
cgit v1.2.3


From 540e00a729dfbefe0aa0d64b6dac1d1b620a1787 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 4 Sep 2024 17:58:00 +0100
Subject: mm: pass vm_flags to generic_get_unmapped_area()

In preparation for using vm_flags to ensure guard pages for shadow stacks
supply them as an argument to generic_get_unmapped_area().  The only user
outside of the core code is the PowerPC book3s64 implementation which is
trivially wrapping the generic implementation in the radix_enabled() case.

No functional changes.

Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-2-a46b8b6dc0ed@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Chris Zankel <chris@zankel.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Naveen N Rao <naveen@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c4d34abc45d4..07bb8d4181d7 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -204,11 +204,11 @@ unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm,
 unsigned long
 generic_get_unmapped_area(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags);
+			  unsigned long flags, vm_flags_t vm_flags);
 unsigned long
 generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 				  unsigned long len, unsigned long pgoff,
-				  unsigned long flags);
+				  unsigned long flags, vm_flags_t vm_flags);
 #else
 static inline void arch_pick_mmap_layout(struct mm_struct *mm,
 					 struct rlimit *rlim_stack) {}
-- 
cgit v1.2.3


From 08e28de1160a712724268fd33d77b32f1bc84d1c Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Tue, 3 Sep 2024 09:36:28 +0200
Subject: uprobes: use vm_special_mapping close() functionality

The following KASAN splat was shown:

[   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
[   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
[   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
[   44.505479]
[   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
[   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
[   44.505508] Call Trace:
[   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
[   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
[   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
[   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
[   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
[   44.505550]  [<000b0324d12c7978>] remove_vma+0x78/0x120
[   44.505557]  [<000b0324d128a2c6>] exit_mmap+0x326/0x750
[   44.505563]  [<000b0324d0ba655a>] __mmput+0x9a/0x370
[   44.505570]  [<000b0324d0bbfbe0>] exit_mm+0x240/0x340
[   44.505575]  [<000b0324d0bc0228>] do_exit+0x548/0xd70
[   44.505580]  [<000b0324d0bc1102>] do_group_exit+0x132/0x390
[   44.505586]  [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60
[   44.505592]  [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430
[   44.505599]  [<000b0324d2f78434>] __do_syscall+0xa4/0x170
[   44.505606]  [<000b0324d2f9454c>] system_call+0x74/0x98
[   44.505614]
[   44.505616] Allocated by task 1384:
[   44.505621]  kasan_save_stack+0x40/0x70
[   44.505630]  kasan_save_track+0x28/0x40
[   44.505636]  __kasan_kmalloc+0xa0/0xc0
[   44.505642]  __create_xol_area+0xfa/0x410
[   44.505648]  get_xol_area+0xb0/0xf0
[   44.505652]  uprobe_notify_resume+0x27a/0x470
[   44.505657]  irqentry_exit_to_user_mode+0x15e/0x1d0
[   44.505664]  pgm_check_handler+0x122/0x170
[   44.505670]
[   44.505672] Freed by task 1384:
[   44.505676]  kasan_save_stack+0x40/0x70
[   44.505682]  kasan_save_track+0x28/0x40
[   44.505687]  kasan_save_free_info+0x4a/0x70
[   44.505693]  __kasan_slab_free+0x5a/0x70
[   44.505698]  kfree+0xe8/0x3f0
[   44.505704]  __mmput+0x20/0x370
[   44.505709]  exit_mm+0x240/0x340
[   44.505713]  do_exit+0x548/0xd70
[   44.505718]  do_group_exit+0x132/0x390
[   44.505722]  __s390x_sys_exit_group+0x56/0x60
[   44.505727]  do_syscall+0x2f6/0x430
[   44.505732]  __do_syscall+0xa4/0x170
[   44.505738]  system_call+0x74/0x98

The problem is that uprobe_clear_state() kfree's struct xol_area, which
contains struct vm_special_mapping *xol_mapping. This one is passed to
_install_special_mapping() in xol_add_vma().
__mput reads:

static inline void __mmput(struct mm_struct *mm)
{
        VM_BUG_ON(atomic_read(&mm->mm_users));

        uprobe_clear_state(mm);
        exit_aio(mm);
        ksm_exit(mm);
        khugepaged_exit(mm); /* must run before exit_mmap */
        exit_mmap(mm);
        ...
}

So uprobe_clear_state() in the beginning free's the memory area
containing the vm_special_mapping data, but exit_mmap() uses this
address later via vma->vm_private_data (which was set in
_install_special_mapping().

Fix this by moving uprobe_clear_state() to uprobes.c and use it as
close() callback.

[usama.anjum@collabora.com: remove unneeded condition]
  Link: https://lkml.kernel.org/r/20240906101825.177490-1-usama.anjum@collabora.com
Link: https://lkml.kernel.org/r/20240903073629.2442754-1-svens@linux.ibm.com
Fixes: 223febc6e557 ("mm: add optional close() to struct vm_special_mapping")
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uprobes.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b503fafb7fb3..493dc95d912c 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -126,7 +126,6 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void uprobe_clear_state(struct mm_struct *mm);
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-- 
cgit v1.2.3


From 0e40cf2a8b2c847950e025d5aa594bd545118d26 Mon Sep 17 00:00:00 2001
From: Kinsey Ho <kinseyho@google.com>
Date: Thu, 5 Sep 2024 00:30:50 +0000
Subject: cgroup: clarify css sibling linkage is protected by cgroup_mutex or
 RCU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "Improve mem_cgroup_iter()", v4.

Incremental cgroup iteration is being used again [1]. This patchset
improves the reliability of mem_cgroup_iter(). It also improves
simplicity and code readability.

[1] https://lore.kernel.org/20240514202641.2821494-1-hannes@cmpxchg.org/


This patch (of 5):

Explicitly document that css sibling/descendant linkage is protected by
cgroup_mutex or RCU.  Also, document in css_next_descendant_pre() and
similar functions that it isn't necessary to hold a ref on @pos.

The following changes in this patchset rely on this clarification for
simplification in memcg iteration code.

Link: https://lkml.kernel.org/r/20240905003058.1859929-1-kinseyho@google.com
Link: https://lkml.kernel.org/r/20240905003058.1859929-2-kinseyho@google.com
Suggested-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Kinsey Ho <kinseyho@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: T.J. Mercier <tjmercier@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cgroup-defs.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7fc2d0195f56..ca7e912b8355 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -172,7 +172,11 @@ struct cgroup_subsys_state {
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
-	/* siblings list anchored at the parent's ->children */
+	/*
+	 * siblings list anchored at the parent's ->children
+	 *
+	 * linkage is protected by cgroup_mutex or RCU
+	 */
 	struct list_head sibling;
 	struct list_head children;
 
-- 
cgit v1.2.3


From ec0db74b4b1f249ffca4df450f54c17573114045 Mon Sep 17 00:00:00 2001
From: Kinsey Ho <kinseyho@google.com>
Date: Thu, 5 Sep 2024 00:30:53 +0000
Subject: mm: restart if multiple traversals raced
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, if multiple reclaimers raced on the same position, the
reclaimers which detect the race will still reclaim from the same memcg.
Instead, the reclaimers which detect the race should move on to the next
memcg in the hierarchy.

So, in the case where multiple traversals race, jump back to the start of
the mem_cgroup_iter() function to find the next memcg in the hierarchy to
reclaim from.

Link: https://lkml.kernel.org/r/20240905003058.1859929-5-kinseyho@google.com
Reported-by: syzbot+e099d407346c45275ce9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/000000000000817cf10620e20d33@google.com/
Signed-off-by: Kinsey Ho <kinseyho@google.com>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fe05fdb92779..2ef94c74847d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -57,7 +57,7 @@ enum memcg_memory_event {
 
 struct mem_cgroup_reclaim_cookie {
 	pg_data_t *pgdat;
-	unsigned int generation;
+	int generation;
 };
 
 #ifdef CONFIG_MEMCG
@@ -78,7 +78,7 @@ struct lruvec_stats;
 struct mem_cgroup_reclaim_iter {
 	struct mem_cgroup *position;
 	/* scan generation, increased every round-trip */
-	unsigned int generation;
+	atomic_t generation;
 };
 
 /*
-- 
cgit v1.2.3


From eebc0f48468e68b93393d11f75ad17385a9acca8 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 5 Sep 2024 22:21:06 -0600
Subject: mm/codetag: fix a typo

Link: https://lkml.kernel.org/r/20240906042108.1150526-1-yuzhao@google.com
Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 8c61ccd161ba..896491d9ebe8 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -70,7 +70,7 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
 /*
  * When percpu variables are required to be defined as weak, static percpu
  * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
- * Instead we will accound all module allocations to a single counter.
+ * Instead we will account all module allocations to a single counter.
  */
 DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 
-- 
cgit v1.2.3


From 95599ef684d01136a8b77c16a7c853496786e173 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 5 Sep 2024 22:21:07 -0600
Subject: mm/codetag: fix pgalloc_tag_split()

The current assumption is that a large folio can only be split into
order-0 folios.  That is not the case for hugeTLB demotion, nor for THP
split: see commit c010d47f107f ("mm: thp: split huge page to any lower
order pages").

When a large folio is split into ones of a lower non-zero order, only the
new head pages should be tagged.  Tagging tail pages can cause imbalanced
"calls" counters, since only head pages are untagged by pgalloc_tag_sub()
and the "calls" counts on tail pages are leaked, e.g.,

  # echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size
  # echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
  # time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote
  # echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
  # grep alloc_gigantic_folio /proc/allocinfo

Before this patch:
  0  549427200  mm/hugetlb.c:1549 func:alloc_gigantic_folio

  real  0m2.057s
  user  0m0.000s
  sys   0m2.051s

After this patch:
  0          0  mm/hugetlb.c:1549 func:alloc_gigantic_folio

  real  0m1.711s
  user  0m0.000s
  sys   0m1.704s

Not tagging tail pages also improves the splitting time, e.g., by about
15% when demoting 1GB hugeTLB folios to 2MB ones, as shown above.

Link: https://lkml.kernel.org/r/20240906042108.1150526-2-yuzhao@google.com
Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h          | 30 ++++++++++++++++++++++++++++++
 include/linux/pgalloc_tag.h | 31 -------------------------------
 2 files changed, 30 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0ff06d18c71..6bb778cbaabf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4084,4 +4084,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
 
 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+	int i;
+	struct alloc_tag *tag;
+	unsigned int nr_pages = 1 << new_order;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag = pgalloc_tag_get(&folio->page);
+	if (!tag)
+		return;
+
+	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
+		union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i));
+
+		if (ref) {
+			/* Set new reference to point to the original tag */
+			alloc_tag_ref_set(ref, tag);
+			put_page_tag_ref(ref);
+		}
+	}
+}
+#else /* !CONFIG_MEM_ALLOC_PROFILING */
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+}
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 207f0c83c8e9..59a3deb792a8 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
 	}
 }
 
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
-{
-	int i;
-	struct page_ext *first_page_ext;
-	struct page_ext *page_ext;
-	union codetag_ref *ref;
-	struct alloc_tag *tag;
-
-	if (!mem_alloc_profiling_enabled())
-		return;
-
-	first_page_ext = page_ext = page_ext_get(page);
-	if (unlikely(!page_ext))
-		return;
-
-	ref = codetag_ref_from_page_ext(page_ext);
-	if (!ref->ct)
-		goto out;
-
-	tag = ct_to_alloc_tag(ref->ct);
-	page_ext = page_ext_next(page_ext);
-	for (i = 1; i < nr; i++) {
-		/* Set new reference to point to the original tag */
-		alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag);
-		page_ext = page_ext_next(page_ext);
-	}
-out:
-	page_ext_put(first_page_ext);
-}
-
 static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
 {
 	struct alloc_tag *tag = NULL;
@@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {}
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr) {}
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
 static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
 static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
 
-- 
cgit v1.2.3


From e0a955bf7f61cb034d228736d81c1ab3a47a3dca Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 5 Sep 2024 22:21:08 -0600
Subject: mm/codetag: add pgalloc_tag_copy()

Add pgalloc_tag_copy() to transfer the codetag from the old folio to the
new one during migration.  This makes original allocation sites persist
cross migration rather than lump into the get_new_folio callbacks passed
into migrate_pages(), e.g., compaction_alloc():

  # echo 1 >/proc/sys/vm/compact_memory
  # grep compaction_alloc /proc/allocinfo

Before this patch:
  132968448  32463  mm/compaction.c:1880 func:compaction_alloc

After this patch:
          0      0  mm/compaction.c:1880 func:compaction_alloc

Link: https://lkml.kernel.org/r/20240906042108.1150526-3-yuzhao@google.com
Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h | 24 ++++++++++--------------
 include/linux/mm.h        | 27 +++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 896491d9ebe8..1f0a9ff23a2c 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -137,7 +137,16 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
 /* Caller should verify both ref and tag to be valid */
 static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
 {
+	alloc_tag_add_check(ref, tag);
+	if (!ref || !tag)
+		return;
+
 	ref->ct = &tag->ct;
+}
+
+static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+{
+	__alloc_tag_ref_set(ref, tag);
 	/*
 	 * We need in increment the call counter every time we have a new
 	 * allocation or when we split a large allocation into smaller ones.
@@ -147,22 +156,9 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag
 	this_cpu_inc(tag->counters->calls);
 }
 
-static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
-{
-	alloc_tag_add_check(ref, tag);
-	if (!ref || !tag)
-		return;
-
-	__alloc_tag_ref_set(ref, tag);
-}
-
 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
 {
-	alloc_tag_add_check(ref, tag);
-	if (!ref || !tag)
-		return;
-
-	__alloc_tag_ref_set(ref, tag);
+	alloc_tag_ref_set(ref, tag);
 	this_cpu_add(tag->counters->bytes, bytes);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6bb778cbaabf..39f6faace590 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4108,10 +4108,37 @@ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new
 		}
 	}
 }
+
+static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
+{
+	struct alloc_tag *tag;
+	union codetag_ref *ref;
+
+	tag = pgalloc_tag_get(&old->page);
+	if (!tag)
+		return;
+
+	ref = get_page_tag_ref(&new->page);
+	if (!ref)
+		return;
+
+	/* Clear the old ref to the original allocation tag. */
+	clear_page_tag_ref(&old->page);
+	/* Decrement the counters of the tag on get_new_folio. */
+	alloc_tag_sub(ref, folio_nr_pages(new));
+
+	__alloc_tag_ref_set(ref, tag);
+
+	put_page_tag_ref(ref);
+}
 #else /* !CONFIG_MEM_ALLOC_PROFILING */
 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
 {
 }
+
+static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
+{
+}
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From 6462dd53a26088a433f90a5c15822196f201037c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Sep 2024 13:42:47 -1000
Subject: sched_ext: Compact struct bpf_iter_scx_dsq_kern

struct scx_iter_scx_dsq is defined as 6 u64's and scx_dsq_iter_kern was
using 5 of them. We want to add two more u64 fields but it's better if we do
so while staying within scx_iter_scx_dsq to maintain binary compatibility.

The way scx_iter_scx_dsq_kern is laid out is rather inefficient - the node
field takes up three u64's but only one bit of the last u64 is used. Turn
the bool into u32 flags and only use the lower 16 bits freeing up 48 bits -
16 bits for flags, 32 bits for a u32 - for use by struct
bpf_iter_scx_dsq_kern.

This allows moving the dsq_seq and flags fields of bpf_iter_scx_dsq_kern
into the cursor field reducing the struct size by a full u64.

No behavior changes intended.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index dc646cca4fcf..1ddbde64a31b 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -119,9 +119,17 @@ enum scx_kf_mask {
 	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
 
+enum scx_dsq_lnode_flags {
+	SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0,
+
+	/* high 16 bits can be for iter cursor flags */
+	__SCX_DSQ_LNODE_PRIV_SHIFT = 16,
+};
+
 struct scx_dsq_list_node {
 	struct list_head	node;
-	bool			is_bpf_iter_cursor;
+	u32			flags;
+	u32			priv;		/* can be used by iter cursor */
 };
 
 /*
-- 
cgit v1.2.3


From 17245a195df4c86b1fd38718d8cdc532c040c08e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 6 Sep 2024 09:10:59 -0700
Subject: net: remove dev_pick_tx_cpu_id()

dev_pick_tx_cpu_id() has been introduced with two users by
commit a4ea8a3dacc3 ("net: Add generic ndo_select_queue functions").
The use in AF_PACKET has been removed in 2019 by
commit b71b5837f871 ("packet: rework packet_pick_tx_queue() to use common code selection")
The other user was a Netlogic XLP driver, removed in 2021 by
commit 47ac6f567c28 ("staging: Remove Netlogic XLP network driver").

It's relatively unlikely that any modern driver will need an
.ndo_select_queue implementation which picks purely based on CPU ID
and skips XPS, delete dev_pick_tx_cpu_id()

Found by code inspection.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240906161059.715546-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b47c00657bd0..2e40a137dc12 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3085,8 +3085,6 @@ void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
 		     struct net_device *sb_dev);
-u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev);
 
 int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev);
 int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
-- 
cgit v1.2.3


From 038eb433dc1474c4bc7d33188294e3d4778efdfd Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Fri, 6 Sep 2024 17:54:34 -0400
Subject: dma-mapping: add tracing for dma-mapping API calls

When debugging drivers, it can often be useful to trace when memory gets
(un)mapped for DMA (and can be accessed by the device). Add some
tracepoints for this purpose.

Use u64 instead of phys_addr_t and dma_addr_t (and similarly %llx instead
of %pa) because libtraceevent can't handle typedefs in all cases.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/dma.h | 341 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 341 insertions(+)
 create mode 100644 include/trace/events/dma.h

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
new file mode 100644
index 000000000000..f57f05331d73
--- /dev/null
+++ b/include/trace/events/dma.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dma
+
+#if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DMA_H
+
+#include <linux/tracepoint.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <trace/events/mmflags.h>
+
+TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL);
+TRACE_DEFINE_ENUM(DMA_TO_DEVICE);
+TRACE_DEFINE_ENUM(DMA_FROM_DEVICE);
+TRACE_DEFINE_ENUM(DMA_NONE);
+
+#define decode_dma_data_direction(dir) \
+	__print_symbolic(dir, \
+		{ DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \
+		{ DMA_TO_DEVICE, "TO_DEVICE" }, \
+		{ DMA_FROM_DEVICE, "FROM_DEVICE" }, \
+		{ DMA_NONE, "NONE" })
+
+#define decode_dma_attrs(attrs) \
+	__print_flags(attrs, "|", \
+		{ DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \
+		{ DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \
+		{ DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \
+		{ DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \
+		{ DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \
+		{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
+		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
+		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" })
+
+DECLARE_EVENT_CLASS(dma_map,
+	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
+		 size_t size, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(u64, phys_addr)
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		__assign_str(device);
+		__entry->phys_addr = phys_addr;
+		__entry->dma_addr = dma_addr;
+		__entry->size = size;
+		__entry->dir = dir;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__entry->dma_addr,
+		__entry->size,
+		__entry->phys_addr,
+		decode_dma_attrs(__entry->attrs))
+);
+
+DEFINE_EVENT(dma_map, dma_map_page,
+	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
+		 size_t size, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
+
+DEFINE_EVENT(dma_map, dma_map_resource,
+	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
+		 size_t size, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
+
+DECLARE_EVENT_CLASS(dma_unmap,
+	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
+		 enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, addr, size, dir, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(u64, addr)
+		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		__assign_str(device);
+		__entry->addr = addr;
+		__entry->size = size;
+		__entry->dir = dir;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__entry->addr,
+		__entry->size,
+		decode_dma_attrs(__entry->attrs))
+);
+
+DEFINE_EVENT(dma_unmap, dma_unmap_page,
+	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
+		 enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, addr, size, dir, attrs));
+
+DEFINE_EVENT(dma_unmap, dma_unmap_resource,
+	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
+		 enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, addr, size, dir, attrs));
+
+TRACE_EVENT(dma_alloc,
+	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
+		 size_t size, gfp_t flags, unsigned long attrs),
+	TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(u64, phys_addr)
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(gfp_t, flags)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		__assign_str(device);
+		__entry->phys_addr = virt_to_phys(virt_addr);
+		__entry->dma_addr = dma_addr;
+		__entry->size = size;
+		__entry->flags = flags;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s",
+		__get_str(device),
+		__entry->dma_addr,
+		__entry->size,
+		__entry->phys_addr,
+		show_gfp_flags(__entry->flags),
+		decode_dma_attrs(__entry->attrs))
+);
+
+TRACE_EVENT(dma_free,
+	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
+		 size_t size, unsigned long attrs),
+	TP_ARGS(dev, virt_addr, dma_addr, size, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(u64, phys_addr)
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		__assign_str(device);
+		__entry->phys_addr = virt_to_phys(virt_addr);
+		__entry->dma_addr = dma_addr;
+		__entry->size = size;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s",
+		__get_str(device),
+		__entry->dma_addr,
+		__entry->size,
+		__entry->phys_addr,
+		decode_dma_attrs(__entry->attrs))
+);
+
+TRACE_EVENT(dma_map_sg,
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+		 int ents, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, sg, nents, ents, dir, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, phys_addrs, nents)
+		__dynamic_array(u64, dma_addrs, ents)
+		__dynamic_array(unsigned int, lengths, ents)
+		__field(enum dma_data_direction, dir)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		int i;
+
+		__assign_str(device);
+		for (i = 0; i < nents; i++)
+			((u64 *)__get_dynamic_array(phys_addrs))[i] =
+				sg_phys(sg + i);
+		for (i = 0; i < ents; i++) {
+			((u64 *)__get_dynamic_array(dma_addrs))[i] =
+				sg_dma_address(sg + i);
+			((unsigned int *)__get_dynamic_array(lengths))[i] =
+				sg_dma_len(sg + i);
+		}
+		__entry->dir = dir;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__print_array(__get_dynamic_array(dma_addrs),
+			      __get_dynamic_array_len(dma_addrs) /
+				sizeof(u64), sizeof(u64)),
+		__print_array(__get_dynamic_array(lengths),
+			      __get_dynamic_array_len(lengths) /
+				sizeof(unsigned int), sizeof(unsigned int)),
+		__print_array(__get_dynamic_array(phys_addrs),
+			      __get_dynamic_array_len(phys_addrs) /
+				sizeof(u64), sizeof(u64)),
+		decode_dma_attrs(__entry->attrs))
+);
+
+TRACE_EVENT(dma_unmap_sg,
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+		 enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, sg, nents, dir, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, addrs, nents)
+		__field(enum dma_data_direction, dir)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		int i;
+
+		__assign_str(device);
+		for (i = 0; i < nents; i++)
+			((u64 *)__get_dynamic_array(addrs))[i] =
+				sg_phys(sg + i);
+		__entry->dir = dir;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s phys_addrs=%s attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__print_array(__get_dynamic_array(addrs),
+			      __get_dynamic_array_len(addrs) /
+				sizeof(u64), sizeof(u64)),
+		decode_dma_attrs(__entry->attrs))
+);
+
+DECLARE_EVENT_CLASS(dma_sync_single,
+	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, dma_addr, size, dir),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
+	),
+
+	TP_fast_assign(
+		__assign_str(device);
+		__entry->dma_addr = dma_addr;
+		__entry->size = size;
+		__entry->dir = dir;
+	),
+
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__entry->dma_addr,
+		__entry->size)
+);
+
+DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu,
+	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, dma_addr, size, dir));
+
+DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device,
+	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, dma_addr, size, dir));
+
+DECLARE_EVENT_CLASS(dma_sync_sg,
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, sg, nents, dir),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, dma_addrs, nents)
+		__dynamic_array(unsigned int, lengths, nents)
+		__field(enum dma_data_direction, dir)
+	),
+
+	TP_fast_assign(
+		int i;
+
+		__assign_str(device);
+		for (i = 0; i < nents; i++) {
+			((u64 *)__get_dynamic_array(dma_addrs))[i] =
+				sg_dma_address(sg + i);
+			((unsigned int *)__get_dynamic_array(lengths))[i] =
+				sg_dma_len(sg + i);
+		}
+		__entry->dir = dir;
+	),
+
+	TP_printk("%s dir=%s dma_addrs=%s sizes=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__print_array(__get_dynamic_array(dma_addrs),
+			      __get_dynamic_array_len(dma_addrs) /
+				sizeof(u64), sizeof(u64)),
+		__print_array(__get_dynamic_array(lengths),
+			      __get_dynamic_array_len(lengths) /
+				sizeof(unsigned int), sizeof(unsigned int)))
+);
+
+DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu,
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, sg, nents, dir));
+
+DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device,
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, sg, nents, dir));
+
+#endif /*  _TRACE_DMA_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 6cf1c97dad2ebc4de03105cc444b3dfaa83f3dc2 Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Tue, 23 Apr 2024 11:41:07 +0800
Subject: virtio_balloon: introduce oom-kill invocations

When the guest OS runs under critical memory pressure, the guest
starts to kill processes. A guest monitor agent may scan 'oom_kill'
from /proc/vmstat, and reports the OOM KILL event. However, the agent
may be killed and we will loss this critical event(and the later
events).

For now we can also grep for magic words in guest kernel log from host
side. Rather than this unstable way, virtio balloon reports OOM-KILL
invocations instead.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Message-Id: <20240423034109.1552866-3-pizhenwei@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_balloon.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e723c4..b17bbe033697 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -71,7 +71,8 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_CACHES   7   /* Disk caches */
 #define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
 #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
-#define VIRTIO_BALLOON_S_NR       10
+#define VIRTIO_BALLOON_S_OOM_KILL      10 /* OOM killer invocations */
+#define VIRTIO_BALLOON_S_NR       11
 
 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
 	VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -83,7 +84,8 @@ struct virtio_balloon_config {
 	VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
 	VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
 	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
-	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
+	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
+	VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
 }
 
 #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
-- 
cgit v1.2.3


From c5b70a26aac39f09a23fd72f44cfbb3d4d5a14d5 Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Tue, 23 Apr 2024 11:41:08 +0800
Subject: virtio_balloon: introduce memory allocation stall counter

Memory allocation stall counter represents the performance/latency of
memory allocation, expose this counter to the host side by virtio
balloon device via out-of-bound way.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Message-Id: <20240423034109.1552866-4-pizhenwei@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_balloon.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index b17bbe033697..487b893a160e 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -72,7 +72,8 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
 #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
 #define VIRTIO_BALLOON_S_OOM_KILL      10 /* OOM killer invocations */
-#define VIRTIO_BALLOON_S_NR       11
+#define VIRTIO_BALLOON_S_ALLOC_STALL   11 /* Stall count of memory allocatoin */
+#define VIRTIO_BALLOON_S_NR       12
 
 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
 	VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -85,7 +86,8 @@ struct virtio_balloon_config {
 	VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
 	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
 	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
-	VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
+	VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \
+	VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \
 }
 
 #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
-- 
cgit v1.2.3


From 74c025c5d7e4ac7c7ad269c1ee64da4bdfe4770c Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Tue, 23 Apr 2024 11:41:09 +0800
Subject: virtio_balloon: introduce memory scan/reclaim info

Expose memory scan/reclaim information to the host side via virtio
balloon device.

Now we have a metric to analyze the memory performance:

y: counter increases
n: counter does not changes
h: the rate of counter change is high
l: the rate of counter change is low

OOM: VIRTIO_BALLOON_S_OOM_KILL
STALL: VIRTIO_BALLOON_S_ALLOC_STALL
ASCAN: VIRTIO_BALLOON_S_SCAN_ASYNC
DSCAN: VIRTIO_BALLOON_S_SCAN_DIRECT
ARCLM: VIRTIO_BALLOON_S_RECLAIM_ASYNC
DRCLM: VIRTIO_BALLOON_S_RECLAIM_DIRECT

- OOM[y], STALL[*], ASCAN[*], DSCAN[*], ARCLM[*], DRCLM[*]:
  the guest runs under really critial memory pressure

- OOM[n], STALL[h], ASCAN[*], DSCAN[l], ARCLM[*], DRCLM[l]:
  the memory allocation stalls due to cgroup, not the global memory
  pressure.

- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[h]:
  the memory allocation stalls due to global memory pressure. The
  performance gets hurt a lot. A high ratio between DRCLM/DSCAN shows
  quite effective memory reclaiming.

- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[l]:
  the memory allocation stalls due to global memory pressure.
  the ratio between DRCLM/DSCAN gets low, the guest OS is thrashing
  heavily, the serious case leads poor performance and difficult
  trouble shooting. Ex, sshd may block on memory allocation when
  accepting new connections, a user can't login a VM by ssh command.

- OOM[n], STALL[n], ASCAN[h], DSCAN[n], ARCLM[l], DRCLM[n]:
  the low ratio between ARCLM/ASCAN shows that the guest tries to
  reclaim more memory, but it can't. Once more memory is required in
  future, it will struggle to reclaim memory.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Message-Id: <20240423034109.1552866-5-pizhenwei@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_balloon.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 487b893a160e..ee35a372805d 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -73,7 +73,11 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
 #define VIRTIO_BALLOON_S_OOM_KILL      10 /* OOM killer invocations */
 #define VIRTIO_BALLOON_S_ALLOC_STALL   11 /* Stall count of memory allocatoin */
-#define VIRTIO_BALLOON_S_NR       12
+#define VIRTIO_BALLOON_S_ASYNC_SCAN    12 /* Amount of memory scanned asynchronously */
+#define VIRTIO_BALLOON_S_DIRECT_SCAN   13 /* Amount of memory scanned directly */
+#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14 /* Amount of memory reclaimed asynchronously */
+#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15 /* Amount of memory reclaimed directly */
+#define VIRTIO_BALLOON_S_NR       16
 
 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
 	VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -87,7 +91,11 @@ struct virtio_balloon_config {
 	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
 	VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
 	VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \
-	VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \
+	VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", \
+	VIRTIO_BALLOON_S_NAMES_prefix "async-scans", \
+	VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", \
+	VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", \
+	VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \
 }
 
 #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
-- 
cgit v1.2.3


From 2f87e9cf0c9e21ab9be1fb2ba8520a1525359497 Mon Sep 17 00:00:00 2001
From: Cindy Lu <lulu@redhat.com>
Date: Wed, 31 Jul 2024 11:16:01 +0800
Subject: vdpa: support set mac address from vdpa tool

Add new UAPI to support the mac address from vdpa tool
Function vdpa_nl_cmd_dev_attr_set_doit() will get the
new MAC address from the vdpa tool and then set it to the device.

The usage is: vdpa dev set name vdpa_name mac **:**:**:**:**:**

Here is example:
root@L1# vdpa -jp dev config show vdpa0
{
    "config": {
        "vdpa0": {
            "mac": "82:4d:e9:5d:d7:e6",
            "link ": "up",
            "link_announce ": false,
            "mtu": 1500
        }
    }
}

root@L1# vdpa dev set name vdpa0 mac 00:11:22:33:44:55

root@L1# vdpa -jp dev config show vdpa0
{
    "config": {
        "vdpa0": {
            "mac": "00:11:22:33:44:55",
            "link ": "up",
            "link_announce ": false,
            "mtu": 1500
        }
    }
}

Signed-off-by: Cindy Lu <lulu@redhat.com>
Message-Id: <20240731031653.1047692-2-lulu@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/vdpa.h      | 9 +++++++++
 include/uapi/linux/vdpa.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 7977ca03ac7a..2e7a30fe6b92 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status);
  *	     @dev: vdpa device to remove
  *	     Driver need to remove the specified device by calling
  *	     _vdpa_unregister_device().
+ * @dev_set_attr: change a vdpa device's attr after it was create
+ *	     @mdev: parent device to use for device
+ *	     @dev: vdpa device structure
+ *	     @config:Attributes to be set for the device.
+ *	     The driver needs to check the mask of the structure and then set
+ *	     the related information to the vdpa device. The driver must return 0
+ *	     if set successfully.
  */
 struct vdpa_mgmtdev_ops {
 	int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
 		       const struct vdpa_dev_set_config *config);
 	void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
+	int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev,
+			    const struct vdpa_dev_set_config *config);
 };
 
 /**
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 842bf1201ac4..71edf2c70cc3 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -19,6 +19,7 @@ enum vdpa_command {
 	VDPA_CMD_DEV_GET,		/* can dump */
 	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
 	VDPA_CMD_DEV_VSTATS_GET,
+	VDPA_CMD_DEV_ATTR_SET,
 };
 
 enum vdpa_attr {
-- 
cgit v1.2.3


From 11596dc3dfae572cc267dda2dc4dab9ae34668f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 10 Sep 2024 07:39:05 +0300
Subject: iomap: pass flags to iomap_file_buffered_write_punch_delalloc

To fix short write error handling, We'll need to figure out what operation
iomap_file_buffered_write_punch_delalloc is called for.  Pass the flags
argument on to it, and reorder the argument list to match that of
->iomap_end so that the compiler only has to add the new punch argument
to the end of it instead of reshuffling the registers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240910043949.3481298-4-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iomap.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f792b37f7627..52626906ff35 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -258,10 +258,6 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops, void *private);
-int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
-		struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
-		int (*punch)(struct inode *inode, loff_t pos, loff_t length));
-
 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
 void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
@@ -277,6 +273,12 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
 			const struct iomap_ops *ops);
+
+typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
+		loff_t length, ssize_t written, unsigned flag,
+		struct iomap *iomap, iomap_punch_t punch);
+
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len, const struct iomap_ops *ops);
 loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
-- 
cgit v1.2.3


From 492f53758fad4fde3e9a98696780f8b53f87cdae Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 10 Sep 2024 07:39:06 +0300
Subject: iomap: pass the iomap to the punch callback

XFS will need to look at the flags in the iomap structure, so pass it
down all the way to the callback.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240910043949.3481298-5-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iomap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 52626906ff35..ec2eab94f00a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -274,7 +274,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
 			const struct iomap_ops *ops);
 
-typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
+typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
+		struct iomap *iomap);
 int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
 		loff_t length, ssize_t written, unsigned flag,
 		struct iomap *iomap, iomap_punch_t punch);
-- 
cgit v1.2.3


From 4bceb9ba05aca23652567fb5f899cc7fcb12c8d0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 10 Sep 2024 07:39:07 +0300
Subject: iomap: remove the iomap_file_buffered_write_punch_delalloc return
 value

iomap_file_buffered_write_punch_delalloc can only return errors if either
the ->punch callback returned an error, or if someone changed the API of
mapping_seek_hole_data to return a negative error code that is not
-ENXIO.

As the only instance of ->punch never returns an error, an such an error
would be fatal anyway remove the entire error propagation and don't
return an error code from iomap_file_buffered_write_punch_delalloc.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240910043949.3481298-6-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iomap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index ec2eab94f00a..4ad12a3c8bae 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -274,9 +274,9 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
 			const struct iomap_ops *ops);
 
-typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
+typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
 		struct iomap *iomap);
-int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
+void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
 		loff_t length, ssize_t written, unsigned flag,
 		struct iomap *iomap, iomap_punch_t punch);
 
-- 
cgit v1.2.3


From 9028cdeb38e1f37d63cb3154799dd259b67e879e Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Thu, 5 Sep 2024 10:34:22 -0700
Subject: memcg: add charging of already allocated slab objects

At the moment, the slab objects are charged to the memcg at the
allocation time. However there are cases where slab objects are
allocated at the time where the right target memcg to charge it to is
not known. One such case is the network sockets for the incoming
connection which are allocated in the softirq context.

Couple hundred thousand connections are very normal on large loaded
server and almost all of those sockets underlying those connections get
allocated in the softirq context and thus not charged to any memcg.
However later at the accept() time we know the right target memcg to
charge. Let's add new API to charge already allocated objects, so we can
have better accounting of the memory usage.

To measure the performance impact of this change, tcp_crr is used from
the neper [1] performance suite. Basically it is a network ping pong
test with new connection for each ping pong.

The server and the client are run inside 3 level of cgroup hierarchy
using the following commands:

Server:
 $ tcp_crr -6

Client:
 $ tcp_crr -6 -c -H ${server_ip}

If the client and server run on different machines with 50 GBPS NIC,
there is no visible impact of the change.

For the same machine experiment with v6.11-rc5 as base.

          base (throughput)     with-patch
tcp_crr   14545 (+- 80)         14463 (+- 56)

It seems like the performance impact is within the noise.

Link: https://github.com/google/neper [1]
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com> # net
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index eb2bf4629157..3be2a5ed4936 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -547,6 +547,35 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
 			    gfp_t gfpflags) __assume_slab_alignment __malloc;
 #define kmem_cache_alloc_lru(...)	alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__))
 
+/**
+ * kmem_cache_charge - memcg charge an already allocated slab memory
+ * @objp: address of the slab object to memcg charge
+ * @gfpflags: describe the allocation context
+ *
+ * kmem_cache_charge allows charging a slab object to the current memcg,
+ * primarily in cases where charging at allocation time might not be possible
+ * because the target memcg is not known (i.e. softirq context)
+ *
+ * The objp should be pointer returned by the slab allocator functions like
+ * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge
+ * behavior can be controlled through gfpflags parameter, which affects how the
+ * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes
+ * that overcharging is requested instead of failure, but is not applied for the
+ * internal metadata allocation.
+ *
+ * There are several cases where it will return true even if the charging was
+ * not done:
+ * More specifically:
+ *
+ * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems.
+ * 2. Already charged slab objects.
+ * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc()
+ *    without __GFP_ACCOUNT
+ * 4. Allocating internal metadata has failed
+ *
+ * Return: true if charge was successful otherwise false.
+ */
+bool kmem_cache_charge(void *objp, gfp_t gfpflags);
 void kmem_cache_free(struct kmem_cache *s, void *objp);
 
 kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
-- 
cgit v1.2.3


From 879fb3c274c12cb0892718e1e54f85fc406e3c7b Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:45 +0200
Subject: slab: add struct kmem_cache_args

Currently we have multiple kmem_cache_create*() variants that take up to
seven separate parameters with one of the functions having to grow an
eigth parameter in the future to handle both usercopy and a custom
freelist pointer.

Add a struct kmem_cache_args structure and move less common parameters
into it. Core parameters such as name, object size, and flags continue
to be passed separately.

Add a new function __kmem_cache_create_args() that takes a struct
kmem_cache_args pointer and port do_kmem_cache_create_usercopy() over to
it.

In follow-up patches we will port the other kmem_cache_create*()
variants over to it as well.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5b2da2cf31a8..2b8eeca7fd2c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -240,6 +240,28 @@ struct mem_cgroup;
  */
 bool slab_is_available(void);
 
+/**
+ * struct kmem_cache_args - Less common arguments for kmem_cache_create()
+ * @align: The required alignment for the objects.
+ * @useroffset: Usercopy region offset
+ * @usersize: Usercopy region size
+ * @freeptr_offset: Custom offset for the free pointer in RCU caches
+ * @use_freeptr_offset: Whether a @freeptr_offset is used
+ * @ctor: A constructor for the objects.
+ */
+struct kmem_cache_args {
+	unsigned int align;
+	unsigned int useroffset;
+	unsigned int usersize;
+	unsigned int freeptr_offset;
+	bool use_freeptr_offset;
+	void (*ctor)(void *);
+};
+
+struct kmem_cache *__kmem_cache_create_args(const char *name,
+					    unsigned int object_size,
+					    struct kmem_cache_args *args,
+					    slab_flags_t flags);
 struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
 			unsigned int align, slab_flags_t flags,
 			void (*ctor)(void *));
-- 
cgit v1.2.3


From 052d67b46bcd91b6785e8e6e047241814f6142a3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:53 +0200
Subject: slab: port KMEM_CACHE() to struct kmem_cache_args

Make KMEM_CACHE() use struct kmem_cache_args.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2b8eeca7fd2c..1f38d94387cc 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -284,9 +284,11 @@ int kmem_cache_shrink(struct kmem_cache *s);
  * f.e. add ____cacheline_aligned_in_smp to the struct declaration
  * then the objects will be properly aligned in SMP configurations.
  */
-#define KMEM_CACHE(__struct, __flags)					\
-		kmem_cache_create(#__struct, sizeof(struct __struct),	\
-			__alignof__(struct __struct), (__flags), NULL)
+#define KMEM_CACHE(__struct, __flags)                                   \
+	__kmem_cache_create_args(#__struct, sizeof(struct __struct),    \
+			&(struct kmem_cache_args) {			\
+				.align	= __alignof__(struct __struct), \
+			}, (__flags))
 
 /*
  * To whitelist a single field for copying to/from usercopy, use this
-- 
cgit v1.2.3


From 199cd13a745eb44fb4828bca373155293cdcfa5c Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:54 +0200
Subject: slab: port KMEM_CACHE_USERCOPY() to struct kmem_cache_args

Make KMEM_CACHE_USERCOPY() use struct kmem_cache_args.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1f38d94387cc..7e15a3a3edb1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -294,12 +294,13 @@ int kmem_cache_shrink(struct kmem_cache *s);
  * To whitelist a single field for copying to/from usercopy, use this
  * macro instead for KMEM_CACHE() above.
  */
-#define KMEM_CACHE_USERCOPY(__struct, __flags, __field)			\
-		kmem_cache_create_usercopy(#__struct,			\
-			sizeof(struct __struct),			\
-			__alignof__(struct __struct), (__flags),	\
-			offsetof(struct __struct, __field),		\
-			sizeof_field(struct __struct, __field), NULL)
+#define KMEM_CACHE_USERCOPY(__struct, __flags, __field)						\
+	__kmem_cache_create_args(#__struct, sizeof(struct __struct),				\
+			&(struct kmem_cache_args) {						\
+				.align		= __alignof__(struct __struct),			\
+				.useroffset	= offsetof(struct __struct, __field),		\
+				.usersize	= sizeof_field(struct __struct, __field),	\
+			}, (__flags))
 
 /*
  * Common kmalloc functions provided by all allocators
-- 
cgit v1.2.3


From b2e7456b5c25c41eda7a8a15f7ccaa4e7579949f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:55 +0200
Subject: slab: create kmem_cache_create() compatibility layer

Use _Generic() to create a compatibility layer that type switches on the
third argument to either call __kmem_cache_create() or
__kmem_cache_create_args(). If NULL is passed for the struct
kmem_cache_args argument use default args making porting for callers
that don't care about additional arguments easy.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7e15a3a3edb1..15eb0a0defd6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -262,9 +262,10 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
 					    unsigned int object_size,
 					    struct kmem_cache_args *args,
 					    slab_flags_t flags);
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
-			unsigned int align, slab_flags_t flags,
-			void (*ctor)(void *));
+
+struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size,
+				       unsigned int align, slab_flags_t flags,
+				       void (*ctor)(void *));
 struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 			unsigned int size, unsigned int align,
 			slab_flags_t flags,
@@ -273,6 +274,28 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size,
 					 unsigned int freeptr_offset,
 					 slab_flags_t flags);
+
+/* If NULL is passed for @args, use this variant with default arguments. */
+static inline struct kmem_cache *
+__kmem_cache_default_args(const char *name, unsigned int size,
+			  struct kmem_cache_args *args,
+			  slab_flags_t flags)
+{
+	struct kmem_cache_args kmem_default_args = {};
+
+	/* Make sure we don't get passed garbage. */
+	if (WARN_ON_ONCE(args))
+		return ERR_PTR(-EINVAL);
+
+	return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
+}
+
+#define kmem_cache_create(__name, __object_size, __args, ...)           \
+	_Generic((__args),                                              \
+		struct kmem_cache_args *: __kmem_cache_create_args,	\
+		void *: __kmem_cache_default_args,			\
+		default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
+
 void kmem_cache_destroy(struct kmem_cache *s);
 int kmem_cache_shrink(struct kmem_cache *s);
 
-- 
cgit v1.2.3


From 3d453e60f1a9c377d670d5fe306d3b9eed477bc0 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:57 +0200
Subject: slab: remove kmem_cache_create_rcu()

Now that we have ported all users of kmem_cache_create_rcu() to struct
kmem_cache_args the function is unused and can be removed.

Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 15eb0a0defd6..cb82a6414a28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -271,9 +271,6 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 			slab_flags_t flags,
 			unsigned int useroffset, unsigned int usersize,
 			void (*ctor)(void *));
-struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size,
-					 unsigned int freeptr_offset,
-					 slab_flags_t flags);
 
 /* If NULL is passed for @args, use this variant with default arguments. */
 static inline struct kmem_cache *
-- 
cgit v1.2.3


From 0c9050b09cfb1ddb3fe4b2d401dca1fb674c825a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:58 +0200
Subject: slab: make kmem_cache_create_usercopy() static inline

Make kmem_cache_create_usercopy() a static inline function.

Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index cb82a6414a28..634717c9f73b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -266,11 +266,50 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
 struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size,
 				       unsigned int align, slab_flags_t flags,
 				       void (*ctor)(void *));
-struct kmem_cache *kmem_cache_create_usercopy(const char *name,
-			unsigned int size, unsigned int align,
-			slab_flags_t flags,
-			unsigned int useroffset, unsigned int usersize,
-			void (*ctor)(void *));
+
+/**
+ * kmem_cache_create_usercopy - Create a cache with a region suitable
+ * for copying to userspace
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @size: The size of objects to be created in this cache.
+ * @align: The required alignment for the objects.
+ * @flags: SLAB flags
+ * @useroffset: Usercopy region offset
+ * @usersize: Usercopy region size
+ * @ctor: A constructor for the objects.
+ *
+ * Cannot be called within a interrupt, but can be interrupted.
+ * The @ctor is run when new pages are allocated by the cache.
+ *
+ * The flags are
+ *
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
+ * to catch references to uninitialised memory.
+ *
+ * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
+ * for buffer overruns.
+ *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline.  This can be beneficial if you're counting cycles as closely
+ * as davem.
+ *
+ * Return: a pointer to the cache on success, NULL on failure.
+ */
+static inline struct kmem_cache *
+kmem_cache_create_usercopy(const char *name, unsigned int size,
+			   unsigned int align, slab_flags_t flags,
+			   unsigned int useroffset, unsigned int usersize,
+			   void (*ctor)(void *))
+{
+	struct kmem_cache_args kmem_args = {
+		.align		= align,
+		.ctor		= ctor,
+		.useroffset	= useroffset,
+		.usersize	= usersize,
+	};
+
+	return __kmem_cache_create_args(name, size, &kmem_args, flags);
+}
 
 /* If NULL is passed for @args, use this variant with default arguments. */
 static inline struct kmem_cache *
-- 
cgit v1.2.3


From 781aee755638dbb6dbfe022bdd2f732621ec9534 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 5 Sep 2024 09:56:59 +0200
Subject: slab: make __kmem_cache_create() static inline

Make __kmem_cache_create() a static inline function.

Signed-off-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 634717c9f73b..331412a9f4f2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -262,10 +262,17 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
 					    unsigned int object_size,
 					    struct kmem_cache_args *args,
 					    slab_flags_t flags);
+static inline struct kmem_cache *
+__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+		    slab_flags_t flags, void (*ctor)(void *))
+{
+	struct kmem_cache_args kmem_args = {
+		.align	= align,
+		.ctor	= ctor,
+	};
 
-struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size,
-				       unsigned int align, slab_flags_t flags,
-				       void (*ctor)(void *));
+	return __kmem_cache_create_args(name, size, &kmem_args, flags);
+}
 
 /**
  * kmem_cache_create_usercopy - Create a cache with a region suitable
-- 
cgit v1.2.3


From 4ba4f1afb6a9fed8ef896c2363076e36572f71da Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:37 -0700
Subject: perf: Generic hotplug support for a PMU with a scope

The perf subsystem assumes that the counters of a PMU are per-CPU. So
the user space tool reads a counter from each CPU in the system wide
mode. However, many PMUs don't have a per-CPU counter. The counter is
effective for a scope, e.g., a die or a socket. To address this, a
cpumask is exposed by the kernel driver to restrict to one CPU to stand
for a specific scope. In case the given CPU is removed,
the hotplug support has to be implemented for each such driver.

The codes to support the cpumask and hotplug are very similar.
- Expose a cpumask into sysfs
- Pickup another CPU in the same scope if the given CPU is removed.
- Invoke the perf_pmu_migrate_context() to migrate to a new CPU.
- In event init, always set the CPU in the cpumask to event->cpu

Similar duplicated codes are implemented for each such PMU driver. It
would be good to introduce a generic infrastructure to avoid such
duplication.

5 popular scopes are implemented here, core, die, cluster, pkg, and
the system-wide. The scope can be set when a PMU is registered. If so, a
"cpumask" is automatically exposed for the PMU.

The "cpumask" is from the perf_online_<scope>_mask, which is to track
the active CPU for each scope. They are set when the first CPU of the
scope is online via the generic perf hotplug support. When a
corresponding CPU is removed, the perf_online_<scope>_mask is updated
accordingly and the PMU will be moved to a new CPU from the same scope
if possible.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240802151643.1691631-2-kan.liang@linux.intel.com
---
 include/linux/perf_event.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 701549967c18..a3cbcd727366 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -295,6 +295,19 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_AUX_OUTPUT			0x0080
 #define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
 
+/**
+ * pmu::scope
+ */
+enum perf_pmu_scope {
+	PERF_PMU_SCOPE_NONE	= 0,
+	PERF_PMU_SCOPE_CORE,
+	PERF_PMU_SCOPE_DIE,
+	PERF_PMU_SCOPE_CLUSTER,
+	PERF_PMU_SCOPE_PKG,
+	PERF_PMU_SCOPE_SYS_WIDE,
+	PERF_PMU_MAX_SCOPE,
+};
+
 struct perf_output_handle;
 
 #define PMU_NULL_DEV	((void *)(~0UL))
@@ -318,6 +331,11 @@ struct pmu {
 	 */
 	int				capabilities;
 
+	/*
+	 * PMU scope
+	 */
+	unsigned int			scope;
+
 	int __percpu			*pmu_disable_count;
 	struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
 	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
-- 
cgit v1.2.3


From a48a36b316ae5d3ab83f9b545dba15998e96d59c Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:38 -0700
Subject: perf: Add PERF_EV_CAP_READ_SCOPE

Usually, an event can be read from any CPU of the scope. It doesn't need
to be read from the advertised CPU.

Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
scope can be read from any active CPU in the scope.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240802151643.1691631-3-kan.liang@linux.intel.com
---
 include/linux/perf_event.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3cbcd727366..794f66057878 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -636,10 +636,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
  * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
  * cannot be a group leader. If an event with this flag is detached from the
  * group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 #define PERF_EV_CAP_SIBLING		BIT(2)
+#define PERF_EV_CAP_READ_SCOPE		BIT(3)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
-- 
cgit v1.2.3


From 08155c7f2a2cc6ff99886ea5743da274be24ebe4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:39 -0700
Subject: perf/x86/intel/cstate: Clean up cpumask and hotplug

There are three cstate PMUs with different scopes, core, die and module.
The scopes are supported by the generic perf_event subsystem now.

Set the scope for each PMU and remove all the cpumask and hotplug codes.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240802151643.1691631-4-kan.liang@linux.intel.com
---
 include/linux/cpuhotplug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 9316c39260e0..2101ae2ecfca 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -152,7 +152,6 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
-	CPUHP_AP_PERF_X86_CSTATE_STARTING,
 	CPUHP_AP_PERF_XTENSA_STARTING,
 	CPUHP_AP_ARM_VFP_STARTING,
 	CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
@@ -209,7 +208,6 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
 	CPUHP_AP_PERF_X86_RAPL_ONLINE,
-	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
-- 
cgit v1.2.3


From 740c1c84bfa3d8c63bd3b01fb570e7452f51fbd8 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 10 Sep 2024 10:26:17 +0800
Subject: spi: remove spi_controller_is_slave() and spi_slave_abort()

spi_controller_is_slave() and spi_slave_abort() are all replaced,
so they can be removed.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://patch.msgid.link/20240910022618.1397-8-yangyingliang@huaweicloud.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index d47d5f14ff99..4b95663163e0 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -498,7 +498,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  *	     controller has native support for memory like operations.
  * @mem_caps: controller capabilities for the handling of memory operations.
  * @unprepare_message: undo any work done by prepare_message().
- * @slave_abort: abort the ongoing transfer request on an SPI slave controller
  * @target_abort: abort the ongoing transfer request on an SPI target controller
  * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
  *	number. Any individual value may be NULL for CS lines that
@@ -725,10 +724,7 @@ struct spi_controller {
 			       struct spi_message *message);
 	int (*unprepare_message)(struct spi_controller *ctlr,
 				 struct spi_message *message);
-	union {
-		int (*slave_abort)(struct spi_controller *ctlr);
-		int (*target_abort)(struct spi_controller *ctlr);
-	};
+	int (*target_abort)(struct spi_controller *ctlr);
 
 	/*
 	 * These hooks are for drivers that use a generic implementation
@@ -802,11 +798,6 @@ static inline void spi_controller_put(struct spi_controller *ctlr)
 		put_device(&ctlr->dev);
 }
 
-static inline bool spi_controller_is_slave(struct spi_controller *ctlr)
-{
-	return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave;
-}
-
 static inline bool spi_controller_is_target(struct spi_controller *ctlr)
 {
 	return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target;
@@ -1296,7 +1287,6 @@ extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi,
 
 extern int spi_setup(struct spi_device *spi);
 extern int spi_async(struct spi_device *spi, struct spi_message *message);
-extern int spi_slave_abort(struct spi_device *spi);
 extern int spi_target_abort(struct spi_device *spi);
 
 static inline size_t
-- 
cgit v1.2.3


From d6de45e3c6f3713d3825d3e2860c11d24e0f941f Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Tue, 10 Sep 2024 00:35:03 +0200
Subject: platform/x86: asus-wmi: Disable OOBE experience on Zenbook S 16
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OOBE experience fades the keyboard backlight in & out continuously,
and make the backlight uncontrollable using its device.

Workaround taken from
https://wiki.archlinux.org/index.php?title=ASUS_Zenbook_UM5606&diff=next&oldid=815547

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20240909223503.1445779-1-bas@basnieuwenhuizen.nl
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 0aeeae1c1943..ae9bf7479e7b 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -62,6 +62,7 @@
 #define ASUS_WMI_DEVID_KBD_BACKLIGHT	0x00050021
 #define ASUS_WMI_DEVID_LIGHT_SENSOR	0x00050022 /* ?? */
 #define ASUS_WMI_DEVID_LIGHTBAR		0x00050025
+#define ASUS_WMI_DEVID_OOBE		0x0005002F
 /* This can only be used to disable the screen, not re-enable */
 #define ASUS_WMI_DEVID_SCREENPAD_POWER	0x00050031
 /* Writing a brightness re-enables the screen if disabled */
-- 
cgit v1.2.3


From 9e97e8b277a2235bbb562a4feb6f1216fb52d1b1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 23 Jul 2024 17:12:15 -0400
Subject: btrfs: update the writepage tracepoint to take a folio

Willy is wanting to get rid of page->index, convert the writepage
tracepoint to take a folio so we can do folio->index instead of
page->index.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0a523023bdcc..0eddbb8b6728 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -674,10 +674,10 @@ TRACE_EVENT(btrfs_finish_ordered_extent,
 
 DECLARE_EVENT_CLASS(btrfs__writepage,
 
-	TP_PROTO(const struct page *page, const struct inode *inode,
+	TP_PROTO(const struct folio *folio, const struct inode *inode,
 		 const struct writeback_control *wbc),
 
-	TP_ARGS(page, inode, wbc),
+	TP_ARGS(folio, inode, wbc),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,	ino			)
@@ -695,7 +695,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
 		__entry->ino		= btrfs_ino(BTRFS_I(inode));
-		__entry->index		= page->index;
+		__entry->index		= folio->index;
 		__entry->nr_to_write	= wbc->nr_to_write;
 		__entry->pages_skipped	= wbc->pages_skipped;
 		__entry->range_start	= wbc->range_start;
@@ -723,10 +723,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 
 DEFINE_EVENT(btrfs__writepage, __extent_writepage,
 
-	TP_PROTO(const struct page *page, const struct inode *inode,
+	TP_PROTO(const struct folio *folio, const struct inode *inode,
 		 const struct writeback_control *wbc),
 
-	TP_ARGS(page, inode, wbc)
+	TP_ARGS(folio, inode, wbc)
 );
 
 TRACE_EVENT(btrfs_writepage_end_io_hook,
-- 
cgit v1.2.3


From 06de42c5a98a28060b314589241cabcacc3c4ff8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 27 Aug 2024 03:30:16 +0200
Subject: btrfs: rename __extent_writepage() and drop double underscores

The function does not follow the pattern where the underscores would be
justified, so rename it.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0eddbb8b6728..e4add61e00f1 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -721,7 +721,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 		  __entry->writeback_index)
 );
 
-DEFINE_EVENT(btrfs__writepage, __extent_writepage,
+DEFINE_EVENT(btrfs__writepage, extent_writepage,
 
 	TP_PROTO(const struct folio *folio, const struct inode *inode,
 		 const struct writeback_control *wbc),
-- 
cgit v1.2.3


From ca283ea9920ac20ae23ed398b693db3121045019 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 26 Jun 2024 23:39:11 +0200
Subject: btrfs: constify more pointer parameters

Continue adding const to parameters.  This is for clarity and minor
addition to safety. There are some minor effects, in the assembly code
and .ko measured on release config.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index e4add61e00f1..bf60ad50011e 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1825,7 +1825,7 @@ TRACE_EVENT(qgroup_update_counters,
 
 TRACE_EVENT(qgroup_update_reserve,
 
-	TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
+	TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup,
 		 s64 diff, int type),
 
 	TP_ARGS(fs_info, qgroup, diff, type),
@@ -1851,7 +1851,7 @@ TRACE_EVENT(qgroup_update_reserve,
 
 TRACE_EVENT(qgroup_meta_reserve,
 
-	TP_PROTO(struct btrfs_root *root, s64 diff, int type),
+	TP_PROTO(const struct btrfs_root *root, s64 diff, int type),
 
 	TP_ARGS(root, diff, type),
 
@@ -1874,7 +1874,7 @@ TRACE_EVENT(qgroup_meta_reserve,
 
 TRACE_EVENT(qgroup_meta_convert,
 
-	TP_PROTO(struct btrfs_root *root, s64 diff),
+	TP_PROTO(const struct btrfs_root *root, s64 diff),
 
 	TP_ARGS(root, diff),
 
-- 
cgit v1.2.3


From 29aeb4e8918e6aeeaf0bb7a03b000a73596d54a3 Mon Sep 17 00:00:00 2001
From: Kiran K <kiran.k@intel.com>
Date: Fri, 26 Jul 2024 16:13:24 +0530
Subject: Bluetooth: Add a helper function to extract iso header

Add a helper function hci_iso_hdr() to extract iso header from skb.

Signed-off-by: Kiran K <kiran.k@intel.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index d1d073089f38..bab1e3d7452a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2901,6 +2901,11 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb)
 	return (struct hci_sco_hdr *) skb->data;
 }
 
+static inline struct hci_iso_hdr *hci_iso_hdr(const struct sk_buff *skb)
+{
+	return (struct hci_iso_hdr *)skb->data;
+}
+
 /* Command opcode pack/unpack */
 #define hci_opcode_pack(ogf, ocf)	((__u16) ((ocf & 0x03ff)|(ogf << 10)))
 #define hci_opcode_ogf(op)		(op >> 10)
-- 
cgit v1.2.3


From 2fcb7936cef3a045ced9d5b8cdb644bced9f99b5 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Mon, 19 Aug 2024 21:52:11 +0800
Subject: Bluetooth: L2CAP: Remove unused declarations

Commit e7b02296fb40 ("Bluetooth: Remove BT_HS") removed the implementations
but leave declarations.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/l2cap.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 5cfdc813491a..313d0b972e06 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -968,10 +968,6 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func,
 		     void *data);
 void l2cap_chan_del(struct l2cap_chan *chan, int err);
 void l2cap_send_conn_req(struct l2cap_chan *chan);
-void l2cap_move_start(struct l2cap_chan *chan);
-void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
-		       u8 status);
-void __l2cap_physical_cfm(struct l2cap_chan *chan, int result);
 
 struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
 void l2cap_conn_put(struct l2cap_conn *conn);
-- 
cgit v1.2.3


From d47da6bd4cfa982fe903f33423b9e2ec541e9496 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 30 Aug 2024 17:29:27 -0400
Subject: Bluetooth: hci_core: Fix sending MGMT_EV_CONNECT_FAILED

If HCI_CONN_MGMT_CONNECTED has been set then the event shall be
HCI_CONN_MGMT_DISCONNECTED.

Fixes: b644ba336997 ("Bluetooth: Update device_connected and device_found events to latest API")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1a32e602630e..88265d37aa72 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -2257,8 +2257,8 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			      bool mgmt_connected);
 void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			    u8 link_type, u8 addr_type, u8 status);
-void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-			 u8 addr_type, u8 status);
+void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn,
+			 u8 status);
 void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure);
 void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
 				  u8 status);
-- 
cgit v1.2.3


From b2d4da31a1f40b05a61076efd4c79b88439003b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 2 Aug 2024 09:56:28 -0400
Subject: drm: new helper: drm_gem_prime_handle_to_dmabuf()

Once something had been put into descriptor table, the only thing you
can do with it is returning descriptor to userland - you can't withdraw
it on subsequent failure exit, etc.  You certainly can't count upon
it staying in the same slot of descriptor table - another thread
could've played with close(2)/dup2(2)/whatnot.

drm_gem_prime_handle_to_fd() creates a dmabuf, allocates a descriptor
and attaches dmabuf's file to it (the last two steps are done
in dma_buf_fd()).  That's nice when all you are going to do is
passing a descriptor to userland.  If you just need to work with the
resulting object or have something else to be done that might fail,
drm_gem_prime_handle_to_fd() is racy.

The problem is analogous to one with anon_inode_getfd(), and solution
is similar to what anon_inode_getfile() provides.

Add drm_gem_prime_handle_to_dmabuf() - the "set dmabuf up" parts of
drm_gem_prime_handle_to_fd() without the descriptor-related ones.
Instead of inserting into descriptor table and returning the file
descriptor it just returns the struct file.

drm_gem_prime_handle_to_fd() becomes a wrapper for it.  Other users
will be introduced in the next commit.

Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/drm_prime.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index 2a1d01e5b56b..fa085c44d4ca 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
 
 int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 			       struct drm_file *file_priv, int prime_fd, uint32_t *handle);
+struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
+			       struct drm_file *file_priv, uint32_t handle,
+			       uint32_t flags);
 int drm_gem_prime_handle_to_fd(struct drm_device *dev,
 			       struct drm_file *file_priv, uint32_t handle, uint32_t flags,
 			       int *prime_fd);
-- 
cgit v1.2.3


From 070a5e6295e88a2b75cbfef04ff8b4ec05775e6d Mon Sep 17 00:00:00 2001
From: Furong Xu <0x1207@gmail.com>
Date: Fri, 6 Sep 2024 22:30:06 +0800
Subject: net: stmmac: move stmmac_fpe_cfg to stmmac_priv data

By moving the fpe_cfg field to the stmmac_priv data, stmmac_fpe_cfg
becomes platform-data eventually, instead of a run-time config.

Suggested-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Furong Xu <0x1207@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Link: https://patch.msgid.link/d9b3d7ecb308c5e39778a4c8ae9df288a2754379.1725631883.git.0x1207@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 338991c08f00..d79ff252cfdc 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -138,33 +138,6 @@ struct stmmac_txq_cfg {
 	int tbs_en;
 };
 
-/* FPE link state */
-enum stmmac_fpe_state {
-	FPE_STATE_OFF = 0,
-	FPE_STATE_CAPABLE = 1,
-	FPE_STATE_ENTERING_ON = 2,
-	FPE_STATE_ON = 3,
-};
-
-/* FPE link-partner hand-shaking mPacket type */
-enum stmmac_mpacket_type {
-	MPACKET_VERIFY = 0,
-	MPACKET_RESPONSE = 1,
-};
-
-enum stmmac_fpe_task_state_t {
-	__FPE_REMOVING,
-	__FPE_TASK_SCHED,
-};
-
-struct stmmac_fpe_cfg {
-	bool enable;				/* FPE enable */
-	bool hs_enable;				/* FPE handshake enable */
-	enum stmmac_fpe_state lp_fpe_state;	/* Link Partner FPE state */
-	enum stmmac_fpe_state lo_fpe_state;	/* Local station FPE state */
-	u32 fpe_csr;				/* MAC_FPE_CTRL_STS reg cache */
-};
-
 struct stmmac_safety_feature_cfg {
 	u32 tsoee;
 	u32 mrxpee;
@@ -232,7 +205,6 @@ struct plat_stmmacenet_data {
 	struct fwnode_handle *port_node;
 	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
-	struct stmmac_fpe_cfg *fpe_cfg;
 	struct stmmac_safety_feature_cfg *safety_feat_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From be8e9eb3750639aa5cffb3f764ca080caed41bd0 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Mon, 9 Sep 2024 09:56:11 +0800
Subject: net-timestamp: introduce SOF_TIMESTAMPING_OPT_RX_FILTER flag

introduce a new flag SOF_TIMESTAMPING_OPT_RX_FILTER in the receive
path. User can set it with SOF_TIMESTAMPING_SOFTWARE to filter
out rx software timestamp report, especially after a process turns on
netstamp_needed_key which can time stamp every incoming skb.

Previously, we found out if an application starts first which turns on
netstamp_needed_key, then another one only passing SOF_TIMESTAMPING_SOFTWARE
could also get rx timestamp. Now we handle this case by introducing this
new flag without breaking users.

Quoting Willem to explain why we need the flag:
"why a process would want to request software timestamp reporting, but
not receive software timestamp generation. The only use I see is when
the application does request
SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_SOFTWARE."

Similarly, this new flag could also be used for hardware case where we
can set it with SOF_TIMESTAMPING_RAW_HARDWARE, then we won't receive
hardware receive timestamp.

Another thing about errqueue in this patch I have a few words to say:
In this case, we need to handle the egress path carefully, or else
reporting the tx timestamp will fail. Egress path and ingress path will
finally call sock_recv_timestamp(). We have to distinguish them.
Errqueue is a good indicator to reflect the flow direction.

Suggested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20240909015612.3856-2-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/net_tstamp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index a2c66b3d7f0f..858339d1c1c4 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -32,8 +32,9 @@ enum {
 	SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 	SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
 	SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
+	SOF_TIMESTAMPING_OPT_RX_FILTER = (1 << 17),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_RX_FILTER,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
-- 
cgit v1.2.3


From d591f6804e7e1310881c9224d72247a2b65039af Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 27 Aug 2024 18:48:46 -0500
Subject: PCI: Wait for device readiness with Configuration RRS

After a device reset, delays are required before the device can
successfully complete config accesses.  PCIe r6.0, sec 6.6, specifies some
delays required before software can perform config accesses.  Devices that
require more time after those delays may respond to config accesses with
Configuration Request Retry Status (RRS) completions.

Callers of pci_dev_wait() are responsible for delays until the device can
respond to config accesses.  pci_dev_wait() waits any additional time until
the device can successfully complete config accesses.

Reading config space of devices that are not present or not ready typically
returns ~0 (PCI_ERROR_RESPONSE).  Previously we polled the Command register
until we got a value other than ~0.  This is sometimes a problem because
Root Complex handling of RRS completions may include several retries and
implementation-specific behavior that is invisible to software (see sec
2.3.2), so the exponential backoff in pci_dev_wait() may not work as
intended.

Linux enables Configuration RRS Software Visibility on all Root Ports that
support it.  If it is enabled, read the Vendor ID instead of the Command
register.  RRS completions cause immediate return of the 0x0001 reserved
Vendor ID value, so the pci_dev_wait() backoff works correctly.

When a read of Vendor ID eventually completes successfully by returning a
non-0x0001 value (the Vendor ID or 0xffff for VFs), the device should be
initialized and ready to respond to config requests.

For conventional PCI devices or devices below Root Ports that don't support
Configuration RRS Software Visibility, poll the Command register as before.

This was developed independently, but is very similar to Stanislav
Spassov's previous work at
https://lore.kernel.org/linux-pci/20200223122057.6504-1-stanspas@amazon.com

Link: https://lore.kernel.org/r/20240827234848.4429-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Duc Dang <ducdang@google.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..121d8d94d6d0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,6 +371,7 @@ struct pci_dev {
 					   can be generated */
 	unsigned int	pme_poll:1;	/* Poll device's PME status bit */
 	unsigned int	pinned:1;	/* Whether this dev is pinned */
+	unsigned int	config_crs_sv:1; /* Config CRS software visibility */
 	unsigned int	imm_ready:1;	/* Supports Immediate Readiness */
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
-- 
cgit v1.2.3


From 87f10faf166a9114aa0d4132298cad379de16fdd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 27 Aug 2024 18:48:48 -0500
Subject: PCI: Rename CRS Completion Status to RRS

PCIe r6.0 changed the abbreviation for "Configuration Request Retry Status"
Completion Status from "CRS" to "RRS" and uses the terminology of
"Configuration RRS Software Visibility" instead of "CRS Software
Visibility".

Align the Linux usage with the r6.0 spec language.  No functional change
intended.

It's confusing to make this change, but I think "RRS" *is* a better
abbreviation because it was easy to interpret "CRS" as "Completion Retry
Status", which really didn't make any sense.

Link: https://lore.kernel.org/r/20240827234848.4429-4-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/bcma/bcma_driver_pci.h | 2 +-
 include/linux/pci.h                  | 2 +-
 include/uapi/linux/pci_regs.h        | 6 ++++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 68da8dba5162..dba41b65ae0d 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -203,7 +203,7 @@ struct pci_dev;
 #define BCMA_CORE_PCI_MDIO_RXCTRL0		0x840
 
 /* PCIE Root Capability Register bits (Host mode only) */
-#define BCMA_CORE_PCI_RC_CRS_VISIBILITY		0x0001
+#define BCMA_CORE_PCI_RC_RRS_VISIBILITY		0x0001
 
 struct bcma_drv_pci;
 struct bcma_bus;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 121d8d94d6d0..27d8ce977674 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,7 +371,7 @@ struct pci_dev {
 					   can be generated */
 	unsigned int	pme_poll:1;	/* Poll device's PME status bit */
 	unsigned int	pinned:1;	/* Whether this dev is pinned */
-	unsigned int	config_crs_sv:1; /* Config CRS software visibility */
+	unsigned int	config_rrs_sv:1; /* Config RRS software visibility */
 	unsigned int	imm_ready:1;	/* Supports Immediate Readiness */
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 94c00996e633..f94591f9f5e9 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -634,9 +634,11 @@
 #define  PCI_EXP_RTCTL_SENFEE	0x0002	/* System Error on Non-Fatal Error */
 #define  PCI_EXP_RTCTL_SEFEE	0x0004	/* System Error on Fatal Error */
 #define  PCI_EXP_RTCTL_PMEIE	0x0008	/* PME Interrupt Enable */
-#define  PCI_EXP_RTCTL_CRSSVE	0x0010	/* CRS Software Visibility Enable */
+#define  PCI_EXP_RTCTL_RRS_SVE	0x0010	/* Config RRS Software Visibility Enable */
+#define  PCI_EXP_RTCTL_CRSSVE PCI_EXP_RTCTL_RRS_SVE /* compatibility */
 #define PCI_EXP_RTCAP		0x1e	/* Root Capabilities */
-#define  PCI_EXP_RTCAP_CRSVIS	0x0001	/* CRS Software Visibility capability */
+#define  PCI_EXP_RTCAP_RRS_SV	0x0001	/* Config RRS Software Visibility */
+#define  PCI_EXP_RTCAP_CRSVIS PCI_EXP_RTCAP_RRS_SV /* compatibility */
 #define PCI_EXP_RTSTA		0x20	/* Root Status */
 #define  PCI_EXP_RTSTA_PME_RQ_ID 0x0000ffff /* PME Requester ID */
 #define  PCI_EXP_RTSTA_PME	0x00010000 /* PME status */
-- 
cgit v1.2.3


From 884ee6dc85b959bc152f15bca80c30f06069e6c4 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 6 Sep 2024 14:27:24 +0800
Subject: f2fs: get rid of online repaire on corrupted directory

syzbot reports a f2fs bug as below:

kernel BUG at fs/f2fs/inode.c:896!
RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896
Call Trace:
 evict+0x532/0x950 fs/inode.c:704
 dispose_list fs/inode.c:747 [inline]
 evict_inodes+0x5f9/0x690 fs/inode.c:797
 generic_shutdown_super+0x9d/0x2d0 fs/super.c:627
 kill_block_super+0x44/0x90 fs/super.c:1696
 kill_f2fs_super+0x344/0x690 fs/f2fs/super.c:4898
 deactivate_locked_super+0xc4/0x130 fs/super.c:473
 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373
 task_work_run+0x24f/0x310 kernel/task_work.c:228
 ptrace_notify+0x2d2/0x380 kernel/signal.c:2402
 ptrace_report_syscall include/linux/ptrace.h:415 [inline]
 ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline]
 syscall_exit_work+0xc6/0x190 kernel/entry/common.c:173
 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline]
 __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline]
 syscall_exit_to_user_mode+0x279/0x370 kernel/entry/common.c:218
 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896

Online repaire on corrupted directory in f2fs_lookup() can generate
dirty data/meta while racing w/ readonly remount, it may leave dirty
inode after filesystem becomes readonly, however, checkpoint() will
skips flushing dirty inode in a state of readonly mode, result in
above panic.

Let's get rid of online repaire in f2fs_lookup(), and leave the work
to fsck.f2fs.

Fixes: 510022a85839 ("f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries")
Reported-by: syzbot+ebea2790904673d7c618@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000a7b20f061ff2d56a@google.com
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f17f89a259e2..b0b821edfd97 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -278,7 +278,7 @@ struct node_footer {
 #define F2FS_INLINE_DATA	0x02	/* file inline data flag */
 #define F2FS_INLINE_DENTRY	0x04	/* file inline dentry flag */
 #define F2FS_DATA_EXIST		0x08	/* file inline data exist flag */
-#define F2FS_INLINE_DOTS	0x10	/* file having implicit dot dentries */
+#define F2FS_INLINE_DOTS	0x10	/* file having implicit dot dentries (obsolete) */
 #define F2FS_EXTRA_ATTR		0x20	/* file having extra attribute */
 #define F2FS_PIN_FILE		0x40	/* file should not be gced */
 #define F2FS_COMPRESS_RELEASED	0x80	/* file released compressed blocks */
-- 
cgit v1.2.3


From 540c830212edc908361c39d40df21772a8bda308 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 9 Sep 2024 20:30:18 +0000
Subject: firmware: imx: remove duplicate scmi_imx_misc_ctrl_get()

These two functions have a stub definition when CONFIG_IMX_SCMI_MISC_EXT
is not set, which conflict with the global definition:

In file included from drivers/firmware/imx/sm-misc.c:6:
include/linux/firmware/imx/sm.h:30:1: error: expected identifier or '(' before '{' token
   30 | {
      | ^
drivers/firmware/imx/sm-misc.c:26:5: error: redefinition of 'scmi_imx_misc_ctrl_get'
   26 | int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
      |     ^~~~~~~~~~~~~~~~~~~~~~
include/linux/firmware/imx/sm.h:24:19: note: previous definition of 'scmi_imx_misc_ctrl_get' with type 'int(u32,  u32 *, u32 *)' {aka 'int(unsigned int,  unsigned int *, unsigned int *)'}
   24 | static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
      |                   ^~~~~~~~~~~~~~~~~~~~~~

There is no real need for the #ifdef, and removing this avoids
the build failure.

Fixes: 0b4f8a68b292 ("firmware: imx: Add i.MX95 MISC driver")
Link: https://lore.kernel.org/r/20240909203023.1275232-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/firmware/imx/sm.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index 62a2690e2abd..9b85a3f028d1 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -17,18 +17,7 @@
 #define SCMI_IMX_CTRL_SAI4_MCLK		4	/* WAKE SAI4 MCLK */
 #define SCMI_IMX_CTRL_SAI5_MCLK		5	/* WAKE SAI5 MCLK */
 
-#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_EXT)
 int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
 int scmi_imx_misc_ctrl_set(u32 id, u32 val);
-#else
-static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
-{
-	return -EOPNOTSUPP;
-}
 
-static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val);
-{
-	return -EOPNOTSUPP;
-}
-#endif
 #endif
-- 
cgit v1.2.3


From 6ebf2d021a13a77b495b4de15c6834e26b80d08e Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Tue, 13 Aug 2024 15:43:46 +0100
Subject: sched/deadline: Clarify nanoseconds in uapi

Specify the time values of the deadline parameters of deadline,
runtime, and period as being in nanoseconds explicitly as they always
have been.

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20240813144348.1180344-3-christian.loehle@arm.com
---
 include/uapi/linux/sched/types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index 90662385689b..bf6e9ae031c1 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -58,9 +58,9 @@
  *
  * This is reflected by the following fields of the sched_attr structure:
  *
- *  @sched_deadline	representative of the task's deadline
- *  @sched_runtime	representative of the task's runtime
- *  @sched_period	representative of the task's period
+ *  @sched_deadline	representative of the task's deadline in nanoseconds
+ *  @sched_runtime	representative of the task's runtime in nanoseconds
+ *  @sched_period	representative of the task's period in nanoseconds
  *
  * Given this task model, there are a multiplicity of scheduling algorithms
  * and policies, that can be used to ensure all the tasks will make their
-- 
cgit v1.2.3


From 4b3fc475c61fa8733a9acc9d743f6cc9ca4915f5 Mon Sep 17 00:00:00 2001
From: Russell King <linux@armlinux.org.uk>
Date: Fri, 6 Sep 2024 16:05:07 +0530
Subject: net: phylink: Add phylink_set_fixed_link() to configure fixed link
 state in phylink

The function allows for the configuration of a fixed link state for a given
phylink instance. This addition is particularly useful for network devices that
operate with a fixed link configuration, where the link parameters do not change
dynamically. By using `phylink_set_fixed_link()`, drivers can easily set up
the fixed link state during initialization or configuration changes.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 2381e07429a2..5c01048860c4 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -598,6 +598,8 @@ int phylink_fwnode_phy_connect(struct phylink *pl,
 			       const struct fwnode_handle *fwnode,
 			       u32 flags);
 void phylink_disconnect_phy(struct phylink *);
+int phylink_set_fixed_link(struct phylink *,
+			   const struct phylink_link_state *);
 
 void phylink_mac_change(struct phylink *, bool up);
 void phylink_pcs_change(struct phylink_pcs *, bool up);
-- 
cgit v1.2.3


From cef7dde8836ab09a3bfe96ada4f18ef2496eacc9 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Mon, 9 Sep 2024 13:04:57 +0300
Subject: net/mlx5: Expand mkey page size to support 6 bits

Protect the usage of the 6th bit with the relevant capability to ensure
we are using the new page sizes with FW that supports the bit extension.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909100504.29797-2-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 970c9d8473ef..ec1117d4e441 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1988,7 +1988,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8         migratable[0x1];
 	u8         reserved_at_81[0x11];
 	u8         query_vuid[0x1];
-	u8         reserved_at_93[0xd];
+	u8         reserved_at_93[0x5];
+	u8         umr_log_entity_size_5[0x1];
+	u8         reserved_at_99[0x7];
 
 	u8	   max_reformat_insert_size[0x8];
 	u8	   max_reformat_insert_offset[0x8];
@@ -4212,8 +4214,7 @@ struct mlx5_ifc_mkc_bits {
 
 	u8         reserved_at_1c0[0x19];
 	u8         relaxed_ordering_read[0x1];
-	u8         reserved_at_1d9[0x1];
-	u8         log_page_size[0x5];
+	u8         log_page_size[0x6];
 
 	u8         reserved_at_1e0[0x20];
 };
-- 
cgit v1.2.3


From 6cd9171d04cff79abe78c166927ab8563bf95fe5 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Mon, 9 Sep 2024 13:04:58 +0300
Subject: net/mlx5: Expose HW bits for Memory scheme ODP

Expose IFC bits to support the new memory scheme on demand paging.
Change the macro reading odp capabilities to be able to read from the
new IFC layout and align the code in upper layers to be compiled.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909100504.29797-3-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h   |  4 +++
 include/linux/mlx5/mlx5_ifc.h | 57 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ba875a619b97..bd081f276654 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1369,6 +1369,10 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
 
+#define MLX5_CAP_ODP_SCHEME(mdev, cap)                       \
+	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \
+		 transport_page_fault_scheme_cap.cap)
+
 #define MLX5_CAP_ODP_MAX(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ec1117d4e441..3e3336bb9191 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1326,11 +1326,13 @@ struct mlx5_ifc_atomic_caps_bits {
 	u8         reserved_at_e0[0x720];
 };
 
-struct mlx5_ifc_odp_cap_bits {
+struct mlx5_ifc_odp_scheme_cap_bits {
 	u8         reserved_at_0[0x40];
 
 	u8         sig[0x1];
-	u8         reserved_at_41[0x1f];
+	u8         reserved_at_41[0x4];
+	u8         page_prefetch[0x1];
+	u8         reserved_at_46[0x1a];
 
 	u8         reserved_at_60[0x20];
 
@@ -1344,7 +1346,20 @@ struct mlx5_ifc_odp_cap_bits {
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps;
 
-	u8         reserved_at_120[0x6E0];
+	u8         reserved_at_120[0xe0];
+};
+
+struct mlx5_ifc_odp_cap_bits {
+	struct mlx5_ifc_odp_scheme_cap_bits transport_page_fault_scheme_cap;
+
+	struct mlx5_ifc_odp_scheme_cap_bits memory_page_fault_scheme_cap;
+
+	u8         reserved_at_400[0x200];
+
+	u8         mem_page_fault[0x1];
+	u8         reserved_at_601[0x1f];
+
+	u8         reserved_at_620[0x1e0];
 };
 
 struct mlx5_ifc_tls_cap_bits {
@@ -2034,7 +2049,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   min_mkey_log_entity_size_fixed_buffer[0x5];
 	u8	   ec_vf_vport_base[0x10];
 
-	u8	   reserved_at_3a0[0x10];
+	u8	   reserved_at_3a0[0xa];
+	u8	   max_mkey_log_entity_size_mtt[0x6];
 	u8	   max_rqt_vhca_id[0x10];
 
 	u8	   reserved_at_3c0[0x20];
@@ -7258,6 +7274,30 @@ struct mlx5_ifc_qp_2err_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_trans_page_fault_info_bits {
+	u8         error[0x1];
+	u8         reserved_at_1[0x4];
+	u8         page_fault_type[0x3];
+	u8         wq_number[0x18];
+
+	u8         reserved_at_20[0x8];
+	u8         fault_token[0x18];
+};
+
+struct mlx5_ifc_mem_page_fault_info_bits {
+	u8          error[0x1];
+	u8          reserved_at_1[0xf];
+	u8          fault_token_47_32[0x10];
+
+	u8          fault_token_31_0[0x20];
+};
+
+union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits {
+	struct mlx5_ifc_trans_page_fault_info_bits trans_page_fault_info;
+	struct mlx5_ifc_mem_page_fault_info_bits mem_page_fault_info;
+	u8          reserved_at_0[0x40];
+};
+
 struct mlx5_ifc_page_fault_resume_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -7274,13 +7314,8 @@ struct mlx5_ifc_page_fault_resume_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         error[0x1];
-	u8         reserved_at_41[0x4];
-	u8         page_fault_type[0x3];
-	u8         wq_number[0x18];
-
-	u8         reserved_at_60[0x8];
-	u8         token[0x18];
+	union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits
+		page_fault_info;
 };
 
 struct mlx5_ifc_nop_out_bits {
-- 
cgit v1.2.3


From 64c68385a39bb676c76da36164ab696e8da78842 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Mon, 9 Sep 2024 13:04:59 +0300
Subject: RDMA/mlx5: Add new ODP memory scheme eqe format

Add new fields to support the new memory scheme page fault and extend
the token field to u64 as in the new scheme the token is 48 bit.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909100504.29797-4-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index bd081f276654..154095256d0d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -211,6 +211,7 @@ enum {
 enum {
 	MLX5_PFAULT_SUBTYPE_WQE = 0,
 	MLX5_PFAULT_SUBTYPE_RDMA = 1,
+	MLX5_PFAULT_SUBTYPE_MEMORY = 2,
 };
 
 enum wqe_page_fault_type {
@@ -646,10 +647,11 @@ struct mlx5_eqe_page_req {
 	__be32		rsvd1[5];
 };
 
+#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096
 struct mlx5_eqe_page_fault {
-	__be32 bytes_committed;
 	union {
 		struct {
+			__be32  bytes_committed;
 			u16     reserved1;
 			__be16  wqe_index;
 			u16	reserved2;
@@ -659,6 +661,7 @@ struct mlx5_eqe_page_fault {
 			__be32  pftype_wq;
 		} __packed wqe;
 		struct {
+			__be32  bytes_committed;
 			__be32  r_key;
 			u16	reserved1;
 			__be16  packet_length;
@@ -666,6 +669,23 @@ struct mlx5_eqe_page_fault {
 			__be64  rdma_va;
 			__be32  pftype_token;
 		} __packed rdma;
+		struct {
+			u8      flags;
+			u8      reserved1;
+			__be16  post_demand_fault_pages;
+			__be16  pre_demand_fault_pages;
+			__be16  token47_32;
+			__be32  token31_0;
+			/*
+			 * FW changed from specifying the fault size in byte
+			 * count to 4k pages granularity. The size specified
+			 * in pages uses bits 31:12, to keep backward
+			 * compatibility.
+			 */
+			__be32 demand_fault_pages;
+			__be32  mkey;
+			__be64  va;
+		} __packed memory;
 	} __packed;
 } __packed;
 
-- 
cgit v1.2.3


From c912ac66b3fc92acb0079fff7d030e9be0756c36 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 9 Sep 2024 15:41:05 +0300
Subject: platform/x86: intel_scu_ipc: Move intel_scu_ipc.h out of
 arch/x86/include/asm

This is a platform/x86 library that is mostly being used by other
drivers not directly under arch/x86 anyway (with the exception of the
Intel MID setup code) so it makes sense that it lives under the
platform_data/x86/ directory instead.

No functional changes intended.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240909124952.1152017-3-andriy.shevchenko@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/x86/intel_scu_ipc.h | 68 +++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/linux/platform_data/x86/intel_scu_ipc.h

(limited to 'include')

diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h
new file mode 100644
index 000000000000..0ca9962e97f2
--- /dev/null
+++ b/include/linux/platform_data/x86/intel_scu_ipc.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_
+#define __PLATFORM_X86_INTEL_SCU_IPC_H_
+
+#include <linux/ioport.h>
+
+struct device;
+struct intel_scu_ipc_dev;
+
+/**
+ * struct intel_scu_ipc_data - Data used to configure SCU IPC
+ * @mem: Base address of SCU IPC MMIO registers
+ * @irq: The IRQ number used for SCU (optional)
+ */
+struct intel_scu_ipc_data {
+	struct resource mem;
+	int irq;
+};
+
+struct intel_scu_ipc_dev *
+__intel_scu_ipc_register(struct device *parent,
+			 const struct intel_scu_ipc_data *scu_data,
+			 struct module *owner);
+
+#define intel_scu_ipc_register(parent, scu_data)  \
+	__intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu);
+
+struct intel_scu_ipc_dev *
+__devm_intel_scu_ipc_register(struct device *parent,
+			      const struct intel_scu_ipc_data *scu_data,
+			      struct module *owner);
+
+#define devm_intel_scu_ipc_register(parent, scu_data)  \
+	__devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void);
+void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu);
+struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev);
+
+int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr,
+			      u8 *data);
+int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr,
+			       u8 data);
+int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr,
+			    u8 *data, size_t len);
+int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr,
+			     u8 *data, size_t len);
+
+int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr,
+			     u8 data, u8 mask);
+
+int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
+				     int sub);
+int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
+					int sub, const void *in, size_t inlen,
+					size_t size, void *out, size_t outlen);
+
+static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd,
+					    int sub, const void *in, size_t inlen,
+					    void *out, size_t outlen)
+{
+	return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen,
+						   inlen, out, outlen);
+}
+
+#endif
-- 
cgit v1.2.3


From 5f1cda51107fd5e53cd012bd1f6f39aede96bdbe Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 9 Sep 2024 15:41:06 +0300
Subject: platform/x86: intel_scu_wdt: Move intel_scu_wdt.h to x86 subfolder

This is a platform/x86 library that can only be used on x86 devices.
so it makes sense that it lives under the platform_data/x86/ directory
instead.

No functional changes intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240909124952.1152017-4-andriy.shevchenko@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/intel-mid_wdt.h     | 19 -------------------
 include/linux/platform_data/x86/intel-mid_wdt.h | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 19 deletions(-)
 delete mode 100644 include/linux/platform_data/intel-mid_wdt.h
 create mode 100644 include/linux/platform_data/x86/intel-mid_wdt.h

(limited to 'include')

diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/intel-mid_wdt.h
deleted file mode 100644
index 8dba70b4b020..000000000000
--- a/include/linux/platform_data/intel-mid_wdt.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *      intel-mid_wdt: generic Intel MID SCU watchdog driver
- *
- *      Copyright (C) 2014 Intel Corporation. All rights reserved.
- *      Contact: David Cohen <david.a.cohen@linux.intel.com>
- */
-
-#ifndef __INTEL_MID_WDT_H__
-#define __INTEL_MID_WDT_H__
-
-#include <linux/platform_device.h>
-
-struct intel_mid_wdt_pdata {
-	int irq;
-	int (*probe)(struct platform_device *pdev);
-};
-
-#endif /*__INTEL_MID_WDT_H__*/
diff --git a/include/linux/platform_data/x86/intel-mid_wdt.h b/include/linux/platform_data/x86/intel-mid_wdt.h
new file mode 100644
index 000000000000..e5c0210d0fec
--- /dev/null
+++ b/include/linux/platform_data/x86/intel-mid_wdt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ */
+
+#ifndef __PLATFORM_X86_INTEL_MID_WDT_H_
+#define __PLATFORM_X86_INTEL_MID_WDT_H_
+
+#include <linux/platform_device.h>
+
+struct intel_mid_wdt_pdata {
+	int irq;
+	int (*probe)(struct platform_device *pdev);
+};
+
+#endif	/* __PLATFORM_X86_INTEL_MID_WDT_H_ */
-- 
cgit v1.2.3


From d3bfbfb1248498656cd25c51e41c1e31219bd0dd Mon Sep 17 00:00:00 2001
From: Kundan Kumar <kundan.kumar@samsung.com>
Date: Wed, 11 Sep 2024 12:19:34 +0530
Subject: mm: release number of pages of a folio

Add a new function unpin_user_folio() to put the refs of a folio by
npages count.

The check for BIO_PAGE_PINNED flag is removed as it is already checked
in bio_release_pages().

Signed-off-by: Kundan Kumar <kundan.kumar@samsung.com>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20240911064935.5630-4-kundan.kumar@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c4b238a20b76..2fd88cd5997d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1597,6 +1597,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
 void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
 				      bool make_dirty);
 void unpin_user_pages(struct page **pages, unsigned long npages);
+void unpin_user_folio(struct folio *folio, unsigned long npages);
 void unpin_folios(struct folio **folios, unsigned long nfolios);
 
 static inline bool is_cow_mapping(vm_flags_t flags)
-- 
cgit v1.2.3


From b03ffc76b83c1a7d058454efbcf1bf0e345ef1c2 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 6 Sep 2024 15:13:31 +0200
Subject: soc: qcom: geni-se: add GP_LENGTH/IRQ_EN_SET/IRQ_EN_CLEAR registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For UART devices the M_GP_LENGTH is the TX word count. For other
devices this is the transaction word count.

For UART devices the S_GP_LENGTH is the RX word count.

The IRQ_EN set/clear registers allow you to set or clear bits in the
IRQ_EN register without needing a read-modify-write.

Acked-by: Bjorn Andersson <andersson@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240610152420.v4.1.Ife7ced506aef1be3158712aa3ff34a006b973559@changeid
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20240906131336.23625-4-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/soc/qcom/geni-se.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index 0f038a1a0330..c3bca9c0bf2c 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -88,11 +88,15 @@ struct geni_se {
 #define SE_GENI_M_IRQ_STATUS		0x610
 #define SE_GENI_M_IRQ_EN		0x614
 #define SE_GENI_M_IRQ_CLEAR		0x618
+#define SE_GENI_M_IRQ_EN_SET		0x61c
+#define SE_GENI_M_IRQ_EN_CLEAR		0x620
 #define SE_GENI_S_CMD0			0x630
 #define SE_GENI_S_CMD_CTRL_REG		0x634
 #define SE_GENI_S_IRQ_STATUS		0x640
 #define SE_GENI_S_IRQ_EN		0x644
 #define SE_GENI_S_IRQ_CLEAR		0x648
+#define SE_GENI_S_IRQ_EN_SET		0x64c
+#define SE_GENI_S_IRQ_EN_CLEAR		0x650
 #define SE_GENI_TX_FIFOn		0x700
 #define SE_GENI_RX_FIFOn		0x780
 #define SE_GENI_TX_FIFO_STATUS		0x800
@@ -101,6 +105,8 @@ struct geni_se {
 #define SE_GENI_RX_WATERMARK_REG	0x810
 #define SE_GENI_RX_RFR_WATERMARK_REG	0x814
 #define SE_GENI_IOS			0x908
+#define SE_GENI_M_GP_LENGTH		0x910
+#define SE_GENI_S_GP_LENGTH		0x914
 #define SE_DMA_TX_IRQ_STAT		0xc40
 #define SE_DMA_TX_IRQ_CLR		0xc44
 #define SE_DMA_TX_FSM_RST		0xc58
@@ -234,6 +240,9 @@ struct geni_se {
 #define IO2_DATA_IN			BIT(1)
 #define RX_DATA_IN			BIT(0)
 
+/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */
+#define GP_LENGTH			GENMASK(31, 0)
+
 /* SE_DMA_TX_IRQ_STAT Register fields */
 #define TX_DMA_DONE			BIT(0)
 #define TX_EOT				BIT(1)
-- 
cgit v1.2.3


From 4c59c59ef3ab9275f2a8f84dcffbd16c285ce8ea Mon Sep 17 00:00:00 2001
From: Nick Chan <towinchenmi@gmail.com>
Date: Wed, 11 Sep 2024 13:02:11 +0800
Subject: tty: serial: samsung: Use bit manipulation macros for APPLE_S5L_*

New entries using BIT() will be added soon, so change the existing ones to
use bit manipulation macros including BIT() and GENMASK() for
consistency.

Suggested-by: Krzysztof Kozlowski <krzk@kernel.org>
Tested-by: Janne Grunau <j@jannau.net>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Signed-off-by: Nick Chan <towinchenmi@gmail.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20240911050741.14477-2-towinchenmi@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_s3c.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h
index 1672cf0810ef..2a934e20ca4b 100644
--- a/include/linux/serial_s3c.h
+++ b/include/linux/serial_s3c.h
@@ -249,9 +249,9 @@
 #define APPLE_S5L_UCON_RXTO_ENA		9
 #define APPLE_S5L_UCON_RXTHRESH_ENA	12
 #define APPLE_S5L_UCON_TXTHRESH_ENA	13
-#define APPLE_S5L_UCON_RXTO_ENA_MSK	(1 << APPLE_S5L_UCON_RXTO_ENA)
-#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK	(1 << APPLE_S5L_UCON_RXTHRESH_ENA)
-#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK	(1 << APPLE_S5L_UCON_TXTHRESH_ENA)
+#define APPLE_S5L_UCON_RXTO_ENA_MSK	BIT(APPLE_S5L_UCON_RXTO_ENA)
+#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK	BIT(APPLE_S5L_UCON_RXTHRESH_ENA)
+#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK	BIT(APPLE_S5L_UCON_TXTHRESH_ENA)
 
 #define APPLE_S5L_UCON_DEFAULT		(S3C2410_UCON_TXIRQMODE | \
 					 S3C2410_UCON_RXIRQMODE | \
@@ -260,10 +260,10 @@
 					 APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \
 					 APPLE_S5L_UCON_TXTHRESH_ENA_MSK)
 
-#define APPLE_S5L_UTRSTAT_RXTHRESH	(1<<4)
-#define APPLE_S5L_UTRSTAT_TXTHRESH	(1<<5)
-#define APPLE_S5L_UTRSTAT_RXTO		(1<<9)
-#define APPLE_S5L_UTRSTAT_ALL_FLAGS	(0x3f0)
+#define APPLE_S5L_UTRSTAT_RXTHRESH	BIT(4)
+#define APPLE_S5L_UTRSTAT_TXTHRESH	BIT(5)
+#define APPLE_S5L_UTRSTAT_RXTO		BIT(9)
+#define APPLE_S5L_UTRSTAT_ALL_FLAGS	GENMASK(9, 4)
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From 5ed771f174726ae879945d4f148a9005ac909cb7 Mon Sep 17 00:00:00 2001
From: Nick Chan <towinchenmi@gmail.com>
Date: Wed, 11 Sep 2024 13:02:13 +0800
Subject: tty: serial: samsung: Fix serial rx on Apple A7-A9

Apple's older A7-A9 SoCs seems to use bit 3 in UTRSTAT as RXTO, which is
enabled by bit 11 in UCON.

Access these bits in addition to the original RXTO and RXTO enable bits,
to allow serial rx to function on A7-A9 SoCs. This change does not
appear to affect the A10 SoC and up.

Tested-by: Janne Grunau <j@jannau.net>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Signed-off-by: Nick Chan <towinchenmi@gmail.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20240911050741.14477-4-towinchenmi@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_s3c.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h
index 2a934e20ca4b..102aa33d956c 100644
--- a/include/linux/serial_s3c.h
+++ b/include/linux/serial_s3c.h
@@ -246,24 +246,28 @@
 				 S5PV210_UFCON_TXTRIG4 |	\
 				 S5PV210_UFCON_RXTRIG4)
 
-#define APPLE_S5L_UCON_RXTO_ENA		9
-#define APPLE_S5L_UCON_RXTHRESH_ENA	12
-#define APPLE_S5L_UCON_TXTHRESH_ENA	13
-#define APPLE_S5L_UCON_RXTO_ENA_MSK	BIT(APPLE_S5L_UCON_RXTO_ENA)
-#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK	BIT(APPLE_S5L_UCON_RXTHRESH_ENA)
-#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK	BIT(APPLE_S5L_UCON_TXTHRESH_ENA)
+#define APPLE_S5L_UCON_RXTO_ENA			9
+#define APPLE_S5L_UCON_RXTO_LEGACY_ENA		11
+#define APPLE_S5L_UCON_RXTHRESH_ENA		12
+#define APPLE_S5L_UCON_TXTHRESH_ENA		13
+#define APPLE_S5L_UCON_RXTO_ENA_MSK		BIT(APPLE_S5L_UCON_RXTO_ENA)
+#define APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK	BIT(APPLE_S5L_UCON_RXTO_LEGACY_ENA)
+#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK		BIT(APPLE_S5L_UCON_RXTHRESH_ENA)
+#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK		BIT(APPLE_S5L_UCON_TXTHRESH_ENA)
 
 #define APPLE_S5L_UCON_DEFAULT		(S3C2410_UCON_TXIRQMODE | \
 					 S3C2410_UCON_RXIRQMODE | \
 					 S3C2410_UCON_RXFIFO_TOI)
 #define APPLE_S5L_UCON_MASK		(APPLE_S5L_UCON_RXTO_ENA_MSK | \
+					 APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK | \
 					 APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \
 					 APPLE_S5L_UCON_TXTHRESH_ENA_MSK)
 
+#define APPLE_S5L_UTRSTAT_RXTO_LEGACY	BIT(3)
 #define APPLE_S5L_UTRSTAT_RXTHRESH	BIT(4)
 #define APPLE_S5L_UTRSTAT_TXTHRESH	BIT(5)
 #define APPLE_S5L_UTRSTAT_RXTO		BIT(9)
-#define APPLE_S5L_UTRSTAT_ALL_FLAGS	GENMASK(9, 4)
+#define APPLE_S5L_UTRSTAT_ALL_FLAGS	GENMASK(9, 3)
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From 9a26234423b87e111351eac0e95775e6ba14d752 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 Sep 2024 15:57:52 +0200
Subject: ALSA: pcm: Fix breakage of PCM rates used for topology

It turned out that the topology ABI takes the standard PCM rate bits
as is, and it means that the recent change of the PCM rate bits would
lead to the inconsistent rate values used for topology.

This patch reverts the original PCM rate bit definitions while adding
the new rates to the extended bits instead.  This needed the change of
snd_pcm_known_rates, too.  And this also required to fix the handling
in snd_pcm_hw_limit_rates() that blindly assumed that the list is
sorted while it became unsorted now.

Fixes: 090624b7dc83 ("ALSA: pcm: add more sample rate definitions")
Reported-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Closes: https://lore.kernel.org/1ab3efaa-863c-4dd0-8f81-b50fd9775fad@linux.intel.com
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Tested-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20240911135756.24434-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c993350975a9..0bf7d25434d7 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -109,23 +109,24 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_RATE_5512		(1U<<0)		/* 5512Hz */
 #define SNDRV_PCM_RATE_8000		(1U<<1)		/* 8000Hz */
 #define SNDRV_PCM_RATE_11025		(1U<<2)		/* 11025Hz */
-#define SNDRV_PCM_RATE_12000		(1U<<3)		/* 12000Hz */
-#define SNDRV_PCM_RATE_16000		(1U<<4)		/* 16000Hz */
-#define SNDRV_PCM_RATE_22050		(1U<<5)		/* 22050Hz */
-#define SNDRV_PCM_RATE_24000		(1U<<6)		/* 24000Hz */
-#define SNDRV_PCM_RATE_32000		(1U<<7)		/* 32000Hz */
-#define SNDRV_PCM_RATE_44100		(1U<<8)		/* 44100Hz */
-#define SNDRV_PCM_RATE_48000		(1U<<9)		/* 48000Hz */
-#define SNDRV_PCM_RATE_64000		(1U<<10)	/* 64000Hz */
-#define SNDRV_PCM_RATE_88200		(1U<<11)	/* 88200Hz */
-#define SNDRV_PCM_RATE_96000		(1U<<12)	/* 96000Hz */
-#define SNDRV_PCM_RATE_128000		(1U<<13)	/* 128000Hz */
-#define SNDRV_PCM_RATE_176400		(1U<<14)	/* 176400Hz */
-#define SNDRV_PCM_RATE_192000		(1U<<15)	/* 192000Hz */
-#define SNDRV_PCM_RATE_352800		(1U<<16)	/* 352800Hz */
-#define SNDRV_PCM_RATE_384000		(1U<<17)	/* 384000Hz */
-#define SNDRV_PCM_RATE_705600		(1U<<18)	/* 705600Hz */
-#define SNDRV_PCM_RATE_768000		(1U<<19)	/* 768000Hz */
+#define SNDRV_PCM_RATE_16000		(1U<<3)		/* 16000Hz */
+#define SNDRV_PCM_RATE_22050		(1U<<4)		/* 22050Hz */
+#define SNDRV_PCM_RATE_32000		(1U<<5)		/* 32000Hz */
+#define SNDRV_PCM_RATE_44100		(1U<<6)		/* 44100Hz */
+#define SNDRV_PCM_RATE_48000		(1U<<7)		/* 48000Hz */
+#define SNDRV_PCM_RATE_64000		(1U<<8)		/* 64000Hz */
+#define SNDRV_PCM_RATE_88200		(1U<<9)		/* 88200Hz */
+#define SNDRV_PCM_RATE_96000		(1U<<10)	/* 96000Hz */
+#define SNDRV_PCM_RATE_176400		(1U<<11)	/* 176400Hz */
+#define SNDRV_PCM_RATE_192000		(1U<<12)	/* 192000Hz */
+#define SNDRV_PCM_RATE_352800		(1U<<13)	/* 352800Hz */
+#define SNDRV_PCM_RATE_384000		(1U<<14)	/* 384000Hz */
+#define SNDRV_PCM_RATE_705600		(1U<<15)	/* 705600Hz */
+#define SNDRV_PCM_RATE_768000		(1U<<16)	/* 768000Hz */
+/* extended rates since 6.12 */
+#define SNDRV_PCM_RATE_12000		(1U<<17)	/* 12000Hz */
+#define SNDRV_PCM_RATE_24000		(1U<<18)	/* 24000Hz */
+#define SNDRV_PCM_RATE_128000		(1U<<19)	/* 128000Hz */
 
 #define SNDRV_PCM_RATE_CONTINUOUS	(1U<<30)	/* continuous range */
 #define SNDRV_PCM_RATE_KNOT		(1U<<31)	/* supports more non-continuous rates */
-- 
cgit v1.2.3


From 01ced022e125f7b328335bb2944a107afcafe351 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 3 Sep 2024 15:32:16 -0500
Subject: ACPI: CPPC: Adjust return code for inline functions in
 !CONFIG_ACPI_CPPC_LIB

Checkpath emits the following warning:
```
WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP
```

Adjust the code accordingly.

Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 include/acpi/cppc_acpi.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 930b6afba6f4..409a7190a4a8 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -162,31 +162,31 @@ extern int cppc_set_auto_sel(int cpu, bool enable);
 #else /* !CONFIG_ACPI_CPPC_LIB */
 static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_set_enable(int cpu, bool enable)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline bool cppc_perf_ctrs_in_pcc(void)
 {
@@ -210,27 +210,27 @@ static inline bool cpc_ffh_supported(void)
 }
 static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_set_auto_sel(int cpu, bool enable)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 #endif /* !CONFIG_ACPI_CPPC_LIB */
 
-- 
cgit v1.2.3


From 6c09e3b445a1a647a5b57ea6afd23e846225dd8f Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 26 Aug 2024 16:13:52 -0500
Subject: x86/amd: Rename amd_get_highest_perf() to
 amd_get_boost_ratio_numerator()

The function name is ambiguous because it returns an intermediate value
for calculating maximum frequency rather than the CPPC 'Highest Perf'
register.

Rename the function to clarify its use and allow the function to return
errors. Adjust the consumer in acpi-cpufreq to catch errors.

Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 include/acpi/cppc_acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 409a7190a4a8..97861abc5f5b 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf);
 extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
 extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
 extern int cppc_set_auto_sel(int cpu, bool enable);
+extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
 #else /* !CONFIG_ACPI_CPPC_LIB */
 static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 {
@@ -232,6 +233,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf
 {
 	return -EOPNOTSUPP;
 }
+static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* !CONFIG_ACPI_CPPC_LIB */
 
 #endif /* _CPPC_ACPI_H*/
-- 
cgit v1.2.3


From 2819bfef6483c66c55064ca678f2630a1a09f3f9 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 26 Aug 2024 16:13:54 -0500
Subject: x86/amd: Move amd_get_highest_perf() out of amd-pstate

amd_pstate_get_highest_perf() is a helper used to get the highest perf
value on AMD systems.  It's used in amd-pstate as part of preferred
core handling, but applicable for acpi-cpufreq as well.

Move it out to cppc handling code as amd_get_highest_perf().

Reviewed-by: Perry Yuan <perry.yuan@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 include/acpi/cppc_acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 97861abc5f5b..f7c7abf2a95e 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf);
 extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
 extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
 extern int cppc_set_auto_sel(int cpu, bool enable);
+extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
 extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
 #else /* !CONFIG_ACPI_CPPC_LIB */
 static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
@@ -233,6 +234,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf
 {
 	return -EOPNOTSUPP;
 }
+static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
+{
+	return -ENODEV;
+}
 static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 279f838a61f96cbfeb1f9ba060e4a452e6e041d0 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 26 Aug 2024 16:13:55 -0500
Subject: x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()

AMD systems that support preferred cores will use "166" as their
numerator for max frequency calculations instead of "255".

Add a function for detecting preferred cores by looking at the
highest perf value on all cores.

If preferred cores are enabled return 166 and if disabled the
value in the highest perf register. As the function will be called
multiple times, cache the values for the boost numerator and if
preferred cores will be enabled in global variables.

Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 include/acpi/cppc_acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index f7c7abf2a95e..482e0587a041 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -161,6 +161,7 @@ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
 extern int cppc_set_auto_sel(int cpu, bool enable);
 extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
 extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
+extern int amd_detect_prefcore(bool *detected);
 #else /* !CONFIG_ACPI_CPPC_LIB */
 static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 {
@@ -242,6 +243,10 @@ static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator
 {
 	return -EOPNOTSUPP;
 }
+static inline int amd_detect_prefcore(bool *detected)
+{
+	return -ENODEV;
+}
 #endif /* !CONFIG_ACPI_CPPC_LIB */
 
 #endif /* _CPPC_ACPI_H*/
-- 
cgit v1.2.3


From edd3f6f7588c713477e1299c38c84dcd91a7f148 Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Wed, 11 Sep 2024 11:37:17 +0800
Subject: tcp: Use skb__nullable in trace_tcp_send_reset

Replace skb with skb__nullable as the argument name. The suffix tells
bpf verifier through btf that the arg could be NULL and should be
checked in tp_btf prog.

For now, this is the only nullable argument in tcp tracepoints.

Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240911033719.91468-4-lulie@linux.alibaba.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/trace/events/tcp.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 1c8bd8e186b8..a27c4b619dff 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -91,10 +91,10 @@ DEFINE_RST_REASON(FN, FN)
 TRACE_EVENT(tcp_send_reset,
 
 	TP_PROTO(const struct sock *sk,
-		 const struct sk_buff *skb,
+		 const struct sk_buff *skb__nullable,
 		 const enum sk_rst_reason reason),
 
-	TP_ARGS(sk, skb, reason),
+	TP_ARGS(sk, skb__nullable, reason),
 
 	TP_STRUCT__entry(
 		__field(const void *, skbaddr)
@@ -106,7 +106,7 @@ TRACE_EVENT(tcp_send_reset,
 	),
 
 	TP_fast_assign(
-		__entry->skbaddr = skb;
+		__entry->skbaddr = skb__nullable;
 		__entry->skaddr = sk;
 		/* Zero means unknown state. */
 		__entry->state = sk ? sk->sk_state : 0;
@@ -118,13 +118,13 @@ TRACE_EVENT(tcp_send_reset,
 			const struct inet_sock *inet = inet_sk(sk);
 
 			TP_STORE_ADDR_PORTS(__entry, inet, sk);
-		} else if (skb) {
-			const struct tcphdr *th = (const struct tcphdr *)skb->data;
+		} else if (skb__nullable) {
+			const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data;
 			/*
 			 * We should reverse the 4-tuple of skb, so later
 			 * it can print the right flow direction of rst.
 			 */
-			TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr);
+			TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr);
 		}
 		__entry->reason = reason;
 	),
-- 
cgit v1.2.3


From 6746ee4c3a189f8b60694f01e7e29bc5ff7972e0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 11 Sep 2024 17:34:37 +0100
Subject: io_uring/cmd: expose iowq to cmds

When an io_uring request needs blocking context we offload it to the
io_uring's thread pool called io-wq. We can get there off ->uring_cmd
by returning -EAGAIN, but there is no straightforward way of doing that
from an asynchronous callback. Add a helper that would transfer a
command to a blocking context.

Note, we do an extra hop via task_work before io_queue_iowq(), that's a
limitation of io_uring infra we have that can likely be lifted later
if that would ever become a problem.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f735f807d7c8ba50c9452c69dfe5d3e9e535037b.1726072086.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 447fbfd32215..86ceb3383e49 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -48,6 +48,9 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
 void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 		unsigned int issue_flags);
 
+/* Execute the request from a blocking context */
+void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
+
 #else
 static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 			      struct iov_iter *iter, void *ioucmd)
@@ -67,6 +70,9 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 		unsigned int issue_flags)
 {
 }
+static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
+{
+}
 #endif
 
 /*
-- 
cgit v1.2.3


From a6ccb48e13662bcb98282e051512b9686b02d353 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 11 Sep 2024 17:34:38 +0100
Subject: io_uring/cmd: give inline space in request to cmds

Some io_uring commands can use some inline space in io_kiocb. We have 32
bytes in struct io_uring_cmd, expose it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7ca779a61ee5e166e535d70df9c7f07b15d8a0ce.1726072086.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 86ceb3383e49..c189d36ad55e 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -23,6 +23,15 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
 	return sqe->cmd;
 }
 
+static inline void io_uring_cmd_private_sz_check(size_t cmd_sz)
+{
+	BUILD_BUG_ON(cmd_sz > sizeof_field(struct io_uring_cmd, pdu));
+}
+#define io_uring_cmd_to_pdu(cmd, pdu_type) ( \
+	io_uring_cmd_private_sz_check(sizeof(pdu_type)), \
+	((pdu_type *)&(cmd)->pdu) \
+)
+
 #if defined(CONFIG_IO_URING)
 int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 			      struct iov_iter *iter, void *ioucmd);
-- 
cgit v1.2.3


From a12c883a0a6a005cfb3ad01feaf783e2248bfc3e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 11 Sep 2024 17:34:39 +0100
Subject: filemap: introduce filemap_invalidate_pages

kiocb_invalidate_pages() is useful for the write path, however not
everything is backed by kiocb and we want to reuse the function for bio
based discard implementation. Extract and and reuse a new helper called
filemap_invalidate_pages(), which takes a argument indicating whether it
should be non-blocking and might return -EAGAIN.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f81374b52c92d0dce0f01a279d1eed42b54056aa.1726072086.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/pagemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d9c7edb6422b..e39c3a7ce33c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -32,6 +32,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		pgoff_t start, pgoff_t end);
 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
 void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
+int filemap_invalidate_pages(struct address_space *mapping,
+			     loff_t pos, loff_t end, bool nowait);
 
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
-- 
cgit v1.2.3


From 50c52250e2d74b098465841163c18f4b4e9ad430 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 11 Sep 2024 17:34:41 +0100
Subject: block: implement async io_uring discard cmd

io_uring allows implementing custom file specific asynchronous
operations via the fops->uring_cmd callback, a.k.a. IORING_OP_URING_CMD
requests or just io_uring commands. Use it to add support for async
discards.

Normally, it first tries to queue up bios in a non-blocking context,
and if that fails, we'd retry from a blocking context by returning
-EAGAIN to the core io_uring. We always get the result from bios
asynchronously by setting a custom bi_end_io callback, at which point
we drag the request into the task context to either reissue or complete
it and post a completion to the user.

Unlike ioctl(BLKDISCARD) with stronger guarantees against races, we only
do a best effort attempt to invalidate page cache, and it can race with
any writes and reads and leave page cache stale. It's the same kind of
races we allow to direct writes.

Also, apart from cases where discarding is not allowed at all, e.g.
discards are not supported or the file/device is read only, the user
should assume that the sector range on disk is not valid anymore, even
when an error was returned to the user.

Suggested-by: Conrad Meyer <conradmeyer@meta.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/2b5210443e4fa0257934f73dfafcc18a77cd0e09.1726072086.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/blkdev.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/uapi/linux/blkdev.h

(limited to 'include')

diff --git a/include/uapi/linux/blkdev.h b/include/uapi/linux/blkdev.h
new file mode 100644
index 000000000000..66373cd1a83a
--- /dev/null
+++ b/include/uapi/linux/blkdev.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_BLKDEV_H
+#define _UAPI_LINUX_BLKDEV_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * io_uring block file commands, see IORING_OP_URING_CMD.
+ * It's a different number space from ioctl(), reuse the block's code 0x12.
+ */
+#define BLOCK_URING_CMD_DISCARD			_IO(0x12, 0)
+
+#endif
-- 
cgit v1.2.3


From 45b8fc3096542a53bfd245a9ad8ef870384b4897 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 29 Aug 2024 10:42:27 -0700
Subject: lib/buildid: rename build_id_parse() into build_id_parse_nofault()

Make it clear that build_id_parse() assumes that it can take no page
fault by renaming it and current few users to build_id_parse_nofault().

Also add build_id_parse() stub which for now falls back to non-sleepable
implementation, but will be changed in subsequent patches to take
advantage of sleepable context. PROCMAP_QUERY ioctl() on
/proc/<pid>/maps file is using build_id_parse() and will automatically
take advantage of more reliable sleepable context implementation.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240829174232.3133883-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/buildid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 20aa3c2d89f7..014a88c41073 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -7,8 +7,8 @@
 #define BUILD_ID_SIZE_MAX 20
 
 struct vm_area_struct;
-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
-		   __u32 *size);
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size);
+int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size);
 int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
 
 #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
-- 
cgit v1.2.3


From d4dd9775ec242425576af93daadb80a34083a53c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 29 Aug 2024 10:42:31 -0700
Subject: bpf: wire up sleepable bpf_get_stack() and bpf_get_task_stack()
 helpers

Add sleepable implementations of bpf_get_stack() and
bpf_get_task_stack() helpers and allow them to be used from sleepable
BPF program (e.g., sleepable uprobes).

Note, the stack trace IPs capturing itself is not sleepable (that would
need to be a separate project), only build ID fetching is sleepable and
thus more reliable, as it will wait for data to be paged in, if
necessary. For that we make use of sleepable build_id_parse()
implementation.

Now that build ID related internals in kernel/bpf/stackmap.c can be used
both in sleepable and non-sleepable contexts, we need to add additional
rcu_read_lock()/rcu_read_unlock() protection around fetching
perf_callchain_entry, but with the refactoring in previous commit it's
now pretty straightforward. We make sure to do rcu_read_unlock (in
sleepable mode only) right before stack_map_get_build_id_offset() call
which can sleep. By that time we don't have any more use of
perf_callchain_entry.

Note, bpf_get_task_stack() will fail for user mode if task != current.
And for kernel mode build ID are irrelevant. So in that sense adding
sleepable bpf_get_task_stack() implementation is a no-op. It feel right
to wire this up for symmetry and completeness, but I'm open to just
dropping it until we support `user && crosstask` condition.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240829174232.3133883-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6f87fb014fba..7f3e8477d5e7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3200,7 +3200,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
 extern const struct bpf_func_proto bpf_get_stack_proto;
+extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
 extern const struct bpf_func_proto bpf_get_task_stack_proto;
+extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
 extern const struct bpf_func_proto bpf_get_stack_proto_pe;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
-- 
cgit v1.2.3


From 6982826fe5e53ef115836de7dd397bd970030937 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Mon, 9 Sep 2024 22:09:22 +0200
Subject: mptcp: fallback to TCP after SYN+MPC drops

Some middleboxes might be nasty with MPTCP, and decide to drop packets
with MPTCP options, instead of just dropping the MPTCP options (or
letting them pass...).

In this case, it sounds better to fallback to "plain" TCP after 2
retransmissions, and try again.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/477
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240909-net-next-mptcp-fallback-x-mpc-v1-2-da7ebb4cd2a3@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mptcp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 0bc4ab03f487..814b5f2e3ed5 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -223,6 +223,8 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 
 	return htonl(0u);
 }
+
+void mptcp_active_detect_blackhole(struct sock *sk, bool expired);
 #else
 
 static inline void mptcp_init(void)
@@ -307,6 +309,8 @@ static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct reques
 }
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)  { return htonl(0u); }
+
+static inline void mptcp_active_detect_blackhole(struct sock *sk, bool expired) { }
 #endif /* CONFIG_MPTCP */
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-- 
cgit v1.2.3


From 6513eb3d3191574b58859ef2d6dc26c0277c6f81 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 10 Sep 2024 17:35:35 -0400
Subject: net: tighten bad gso csum offset check in virtio_net_hdr

The referenced commit drops bad input, but has false positives.
Tighten the check to avoid these.

The check detects illegal checksum offload requests, which produce
csum_start/csum_off beyond end of packet after segmentation.

But it is based on two incorrect assumptions:

1. virtio_net_hdr_to_skb with VIRTIO_NET_HDR_GSO_TCP[46] implies GSO.
True in callers that inject into the tx path, such as tap.
But false in callers that inject into rx, like virtio-net.
Here, the flags indicate GRO, and CHECKSUM_UNNECESSARY or
CHECKSUM_NONE without VIRTIO_NET_HDR_F_NEEDS_CSUM is normal.

2. TSO requires checksum offload, i.e., ip_summed == CHECKSUM_PARTIAL.
False, as tcp[46]_gso_segment will fix up csum_start and offset for
all other ip_summed by calling __tcp_v4_send_check.

Because of 2, we can limit the scope of the fix to virtio_net_hdr
that do try to set these fields, with a bogus value.

Link: https://lore.kernel.org/netdev/20240909094527.GA3048202@port70.net/
Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20240910213553.839926-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 6c395a2600e8..276ca543ef44 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -173,7 +173,8 @@ retry:
 			break;
 		case SKB_GSO_TCPV4:
 		case SKB_GSO_TCPV6:
-			if (skb->csum_offset != offsetof(struct tcphdr, check))
+			if (skb->ip_summed == CHECKSUM_PARTIAL &&
+			    skb->csum_offset != offsetof(struct tcphdr, check))
 				return -EINVAL;
 			break;
 		}
-- 
cgit v1.2.3


From 7c88f86576f382a5037f7acf2fce796ccafba4db Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:45 +0000
Subject: netdev: add netdev_rx_queue_restart()

Add netdev_rx_queue_restart(), which resets an rx queue using the
queue API recently merged[1].

The queue API was merged to enable the core net stack to reset individual
rx queues to actuate changes in the rx queue's configuration. In later
patches in this series, we will use netdev_rx_queue_restart() to reset
rx queues after binding or unbinding dmabuf configuration, which will
cause reallocation of the page_pool to repopulate its memory using the
new configuration.

[1] https://lore.kernel.org/netdev/20240430231420.699177-1-shailend@google.com/T/

Signed-off-by: David Wei <dw@davidwei.uk>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-2-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netdev_rx_queue.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index aa1716fb0e53..e78ca52d67fb 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -54,4 +54,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
 	return index;
 }
 #endif
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+
 #endif
-- 
cgit v1.2.3


From 3efd7ab46d0aebc3e567a9846e79a98dbad3291c Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:46 +0000
Subject: net: netdev netlink api to bind dma-buf to a net device

API takes the dma-buf fd as input, and binds it to the netdevice. The
user can specify the rx queues to bind the dma-buf to.

Suggested-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-3-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netdev.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 43742ac5b00d..91bf3ecc5f1d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -173,6 +173,16 @@ enum {
 	NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
 };
 
+enum {
+	NETDEV_A_DMABUF_IFINDEX = 1,
+	NETDEV_A_DMABUF_QUEUES,
+	NETDEV_A_DMABUF_FD,
+	NETDEV_A_DMABUF_ID,
+
+	__NETDEV_A_DMABUF_MAX,
+	NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
@@ -186,6 +196,7 @@ enum {
 	NETDEV_CMD_QUEUE_GET,
 	NETDEV_CMD_NAPI_GET,
 	NETDEV_CMD_QSTATS_GET,
+	NETDEV_CMD_BIND_RX,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
-- 
cgit v1.2.3


From 170aafe35cb98e0f3fbacb446ea86389fbce22ea Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:47 +0000
Subject: netdev: support binding dma-buf to netdevice

Add a netdev_dmabuf_binding struct which represents the
dma-buf-to-netdevice binding. The netlink API will bind the dma-buf to
rx queues on the netdevice. On the binding, the dma_buf_attach
& dma_buf_map_attachment will occur. The entries in the sg_table from
mapping will be inserted into a genpool to make it ready
for allocation.

The chunks in the genpool are owned by a dmabuf_chunk_owner struct which
holds the dma-buf offset of the base of the chunk and the dma_addr of
the chunk. Both are needed to use allocations that come from this chunk.

We create a new type that represents an allocation from the genpool:
net_iov. We setup the net_iov allocation size in the
genpool to PAGE_SIZE for simplicity: to match the PAGE_SIZE normally
allocated by the page pool and given to the drivers.

The user can unbind the dmabuf from the netdevice by closing the netlink
socket that established the binding. We do this so that the binding is
automatically unbound even if the userspace process crashes.

The binding and unbinding leaves an indicator in struct netdev_rx_queue
that the given queue is bound, and the binding is actuated by resetting
the rx queue using the queue API.

The netdev_dmabuf_binding struct is refcounted, and releases its
resources only when all the refs are released.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # excluding netlink
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-4-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h     | 2 ++
 include/net/netdev_rx_queue.h | 2 ++
 include/net/netmem.h          | 8 ++++++++
 include/net/page_pool/types.h | 6 ++++++
 4 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2465bdb6037f..e87b5e488325 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3953,6 +3953,8 @@ u8 dev_xdp_prog_count(struct net_device *dev);
 int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
+u32 dev_get_min_mp_channel_count(const struct net_device *dev);
+
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index e78ca52d67fb..ac34f5fb4f71 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <net/xdp.h>
+#include <net/page_pool/types.h>
 
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
@@ -25,6 +26,7 @@ struct netdev_rx_queue {
 	 * Readers and writers must hold RTNL
 	 */
 	struct napi_struct		*napi;
+	struct pp_memory_provider_params mp_params;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 46cc9b89ac79..c23e224dd6a0 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -8,6 +8,14 @@
 #ifndef _NET_NETMEM_H
 #define _NET_NETMEM_H
 
+/* net_iov */
+
+struct net_iov {
+	struct dmabuf_genpool_chunk_owner *owner;
+};
+
+/* netmem */
+
 /**
  * typedef netmem_ref - a nonexistent type marking a reference to generic
  * network memory.
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 50569fed7868..4afd6dd56351 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -139,6 +139,10 @@ struct page_pool_stats {
  */
 #define PAGE_POOL_FRAG_GROUP_ALIGN	(4 * sizeof(long))
 
+struct pp_memory_provider_params {
+	void *mp_priv;
+};
+
 struct page_pool {
 	struct page_pool_params_fast p;
 
@@ -197,6 +201,8 @@ struct page_pool {
 	 */
 	struct ptr_ring ring;
 
+	void *mp_priv;
+
 #ifdef CONFIG_PAGE_POOL_STATS
 	/* recycle stats are per-cpu to avoid locking */
 	struct page_pool_recycle_stats __percpu *recycle_stats;
-- 
cgit v1.2.3


From 8ab79ed50cf10f338465c296012500de1081646f Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:49 +0000
Subject: page_pool: devmem support

Convert netmem to be a union of struct page and struct netmem. Overload
the LSB of struct netmem* to indicate that it's a net_iov, otherwise
it's a page.

Currently these entries in struct page are rented by the page_pool and
used exclusively by the net stack:

struct {
	unsigned long pp_magic;
	struct page_pool *pp;
	unsigned long _pp_mapping_pad;
	unsigned long dma_addr;
	atomic_long_t pp_ref_count;
};

Mirror these (and only these) entries into struct net_iov and implement
netmem helpers that can access these common fields regardless of
whether the underlying type is page or net_iov.

Implement checks for net_iov in netmem helpers which delegate to mm
APIs, to ensure net_iov are never passed to the mm stack.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-6-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netmem.h             | 124 ++++++++++++++++++++++++++++++++++++---
 include/net/page_pool/helpers.h  |  39 +++---------
 include/trace/events/page_pool.h |  12 ++--
 3 files changed, 130 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/netmem.h b/include/net/netmem.h
index c23e224dd6a0..8a6e20be4b9d 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -8,12 +8,52 @@
 #ifndef _NET_NETMEM_H
 #define _NET_NETMEM_H
 
+#include <linux/mm.h>
+#include <net/net_debug.h>
+
 /* net_iov */
 
+DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
+/*  We overload the LSB of the struct page pointer to indicate whether it's
+ *  a page or net_iov.
+ */
+#define NET_IOV 0x01UL
+
 struct net_iov {
+	unsigned long __unused_padding;
+	unsigned long pp_magic;
+	struct page_pool *pp;
 	struct dmabuf_genpool_chunk_owner *owner;
+	unsigned long dma_addr;
+	atomic_long_t pp_ref_count;
 };
 
+/* These fields in struct page are used by the page_pool and net stack:
+ *
+ *        struct {
+ *                unsigned long pp_magic;
+ *                struct page_pool *pp;
+ *                unsigned long _pp_mapping_pad;
+ *                unsigned long dma_addr;
+ *                atomic_long_t pp_ref_count;
+ *        };
+ *
+ * We mirror the page_pool fields here so the page_pool can access these fields
+ * without worrying whether the underlying fields belong to a page or net_iov.
+ *
+ * The non-net stack fields of struct page are private to the mm stack and must
+ * never be mirrored to net_iov.
+ */
+#define NET_IOV_ASSERT_OFFSET(pg, iov)             \
+	static_assert(offsetof(struct page, pg) == \
+		      offsetof(struct net_iov, iov))
+NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
+NET_IOV_ASSERT_OFFSET(pp, pp);
+NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
+NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
+#undef NET_IOV_ASSERT_OFFSET
+
 /* netmem */
 
 /**
@@ -27,20 +67,37 @@ struct net_iov {
  */
 typedef unsigned long __bitwise netmem_ref;
 
+static inline bool netmem_is_net_iov(const netmem_ref netmem)
+{
+	return (__force unsigned long)netmem & NET_IOV;
+}
+
 /* This conversion fails (returns NULL) if the netmem_ref is not struct page
  * backed.
- *
- * Currently struct page is the only possible netmem, and this helper never
- * fails.
  */
 static inline struct page *netmem_to_page(netmem_ref netmem)
 {
+	if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+		return NULL;
+
 	return (__force struct page *)netmem;
 }
 
-/* Converting from page to netmem is always safe, because a page can always be
- * a netmem.
- */
+static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
+{
+	if (netmem_is_net_iov(netmem))
+		return (struct net_iov *)((__force unsigned long)netmem &
+					  ~NET_IOV);
+
+	DEBUG_NET_WARN_ON_ONCE(true);
+	return NULL;
+}
+
+static inline netmem_ref net_iov_to_netmem(struct net_iov *niov)
+{
+	return (__force netmem_ref)((unsigned long)niov | NET_IOV);
+}
+
 static inline netmem_ref page_to_netmem(struct page *page)
 {
 	return (__force netmem_ref)page;
@@ -48,17 +105,70 @@ static inline netmem_ref page_to_netmem(struct page *page)
 
 static inline int netmem_ref_count(netmem_ref netmem)
 {
+	/* The non-pp refcount of net_iov is always 1. On net_iov, we only
+	 * support pp refcounting which uses the pp_ref_count field.
+	 */
+	if (netmem_is_net_iov(netmem))
+		return 1;
+
 	return page_ref_count(netmem_to_page(netmem));
 }
 
-static inline unsigned long netmem_to_pfn(netmem_ref netmem)
+static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
 {
+	if (netmem_is_net_iov(netmem))
+		return 0;
+
 	return page_to_pfn(netmem_to_page(netmem));
 }
 
+static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
+{
+	return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
+}
+
+static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
+{
+	return __netmem_clear_lsb(netmem)->pp;
+}
+
+static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
+{
+	return &__netmem_clear_lsb(netmem)->pp_ref_count;
+}
+
+static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
+{
+	/* NUMA node preference only makes sense if we're allocating
+	 * system memory. Memory providers (which give us net_iovs)
+	 * choose for us.
+	 */
+	if (netmem_is_net_iov(netmem))
+		return true;
+
+	return page_to_nid(netmem_to_page(netmem)) == pref_nid;
+}
+
 static inline netmem_ref netmem_compound_head(netmem_ref netmem)
 {
+	/* niov are never compounded */
+	if (netmem_is_net_iov(netmem))
+		return netmem;
+
 	return page_to_netmem(compound_head(netmem_to_page(netmem)));
 }
 
+static inline void *netmem_address(netmem_ref netmem)
+{
+	if (netmem_is_net_iov(netmem))
+		return NULL;
+
+	return page_address(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
+{
+	return __netmem_clear_lsb(netmem)->dma_addr;
+}
+
 #endif /* _NET_NETMEM_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 2b43a893c619..793e6fd78bc5 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -216,7 +216,7 @@ page_pool_get_dma_dir(const struct page_pool *pool)
 
 static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
 {
-	atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
+	atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr);
 }
 
 /**
@@ -244,7 +244,7 @@ static inline void page_pool_fragment_page(struct page *page, long nr)
 
 static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
 {
-	struct page *page = netmem_to_page(netmem);
+	atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem);
 	long ret;
 
 	/* If nr == pp_ref_count then we have cleared all remaining
@@ -261,19 +261,19 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
 	 * initially, and only overwrite it when the page is partitioned into
 	 * more than one piece.
 	 */
-	if (atomic_long_read(&page->pp_ref_count) == nr) {
+	if (atomic_long_read(pp_ref_count) == nr) {
 		/* As we have ensured nr is always one for constant case using
 		 * the BUILD_BUG_ON(), only need to handle the non-constant case
 		 * here for pp_ref_count draining, which is a rare case.
 		 */
 		BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
 		if (!__builtin_constant_p(nr))
-			atomic_long_set(&page->pp_ref_count, 1);
+			atomic_long_set(pp_ref_count, 1);
 
 		return 0;
 	}
 
-	ret = atomic_long_sub_return(nr, &page->pp_ref_count);
+	ret = atomic_long_sub_return(nr, pp_ref_count);
 	WARN_ON(ret < 0);
 
 	/* We are the last user here too, reset pp_ref_count back to 1 to
@@ -282,7 +282,7 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
 	 * page_pool_unref_page() currently.
 	 */
 	if (unlikely(!ret))
-		atomic_long_set(&page->pp_ref_count, 1);
+		atomic_long_set(pp_ref_count, 1);
 
 	return ret;
 }
@@ -401,9 +401,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
 
 static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
 {
-	struct page *page = netmem_to_page(netmem);
-
-	dma_addr_t ret = page->dma_addr;
+	dma_addr_t ret = netmem_get_dma_addr(netmem);
 
 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
 		ret <<= PAGE_SHIFT;
@@ -423,24 +421,6 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
 	return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
 }
 
-static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
-						 dma_addr_t addr)
-{
-	struct page *page = netmem_to_page(netmem);
-
-	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
-		page->dma_addr = addr >> PAGE_SHIFT;
-
-		/* We assume page alignment to shave off bottom bits,
-		 * if this "compression" doesn't work we need to drop.
-		 */
-		return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
-	}
-
-	page->dma_addr = addr;
-	return false;
-}
-
 /**
  * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW
  * @pool: &page_pool the @page belongs to
@@ -463,11 +443,6 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
 				      page_pool_get_dma_dir(pool));
 }
 
-static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
-{
-	return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
-}
-
 static inline bool page_pool_put(struct page_pool *pool)
 {
 	return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
index 543e54e432a1..31825ed30032 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -57,12 +57,12 @@ TRACE_EVENT(page_pool_state_release,
 		__entry->pool		= pool;
 		__entry->netmem		= (__force unsigned long)netmem;
 		__entry->release	= release;
-		__entry->pfn		= netmem_to_pfn(netmem);
+		__entry->pfn		= netmem_pfn_trace(netmem);
 	),
 
-	TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u",
+	TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u",
 		  __entry->pool, (void *)__entry->netmem,
-		  __entry->pfn, __entry->release)
+		  __entry->netmem & NET_IOV, __entry->pfn, __entry->release)
 );
 
 TRACE_EVENT(page_pool_state_hold,
@@ -83,12 +83,12 @@ TRACE_EVENT(page_pool_state_hold,
 		__entry->pool	= pool;
 		__entry->netmem	= (__force unsigned long)netmem;
 		__entry->hold	= hold;
-		__entry->pfn	= netmem_to_pfn(netmem);
+		__entry->pfn	= netmem_pfn_trace(netmem);
 	),
 
-	TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u",
+	TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u",
 		  __entry->pool, (void *)__entry->netmem,
-		  __entry->pfn, __entry->hold)
+		  __entry->netmem & NET_IOV, __entry->pfn, __entry->hold)
 );
 
 TRACE_EVENT(page_pool_update_nid,
-- 
cgit v1.2.3


From 0f921404689398943257793f7240db239a23b609 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:50 +0000
Subject: memory-provider: dmabuf devmem memory provider

Implement a memory provider that allocates dmabuf devmem in the form of
net_iov.

The provider receives a reference to the struct netdev_dmabuf_binding
via the pool->mp_priv pointer. The driver needs to set this pointer for
the provider in the net_iov.

The provider obtains a reference on the netdev_dmabuf_binding which
guarantees the binding and the underlying mapping remains alive until
the provider is destroyed.

Usage of PP_FLAG_DMA_MAP is required for this memory provide such that
the page_pool can provide the driver with the dma-addrs of the devmem.

Support for PP_FLAG_DMA_SYNC_DEV is omitted for simplicity & p.order !=
0.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-7-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/page_pool/types.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 4afd6dd56351..c022c410abe3 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -20,8 +20,18 @@
 					* device driver responsibility
 					*/
 #define PP_FLAG_SYSTEM_POOL	BIT(2) /* Global system page_pool */
+
+/* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting
+ * this must be able to support unreadable netmem, where netmem_address() would
+ * return NULL. This flag should not be set for header page_pools.
+ *
+ * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set
+ * page_pool_params.slow.queue_idx.
+ */
+#define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3)
+
 #define PP_FLAG_ALL		(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
-				 PP_FLAG_SYSTEM_POOL)
+				 PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
 
 /*
  * Fast allocation side cache array/stack
@@ -57,7 +67,9 @@ struct pp_alloc_cache {
  * @offset:	DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
  * @slow:	params with slowpath access only (initialization and Netlink)
  * @netdev:	netdev this pool will serve (leave as NULL if none or multiple)
- * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
+ * @queue_idx:	queue idx this page_pool is being created for.
+ * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL,
+ *		PP_FLAG_ALLOW_UNREADABLE_NETMEM.
  */
 struct page_pool_params {
 	struct_group_tagged(page_pool_params_fast, fast,
@@ -72,6 +84,7 @@ struct page_pool_params {
 	);
 	struct_group_tagged(page_pool_params_slow, slow,
 		struct net_device *netdev;
+		unsigned int queue_idx;
 		unsigned int	flags;
 /* private: used by test code only */
 		void (*init_callback)(netmem_ref netmem, void *arg);
-- 
cgit v1.2.3


From 9f6b619edf2e85746f261b42ae8f818a59d126f7 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:51 +0000
Subject: net: support non paged skb frags

Make skb_frag_page() fail in the case where the frag is not backed
by a page, and fix its relevant callers to handle this case.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-8-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h     | 42 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/skbuff_ref.h |  9 ++++-----
 2 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5803eb8a157d..1019f14583c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3524,21 +3524,58 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
 	fragto->offset = fragfrom->offset;
 }
 
+/* Return: true if the skb_frag contains a net_iov. */
+static inline bool skb_frag_is_net_iov(const skb_frag_t *frag)
+{
+	return netmem_is_net_iov(frag->netmem);
+}
+
+/**
+ * skb_frag_net_iov - retrieve the net_iov referred to by fragment
+ * @frag: the fragment
+ *
+ * Return: the &struct net_iov associated with @frag. Returns NULL if this
+ * frag has no associated net_iov.
+ */
+static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag)
+{
+	if (!skb_frag_is_net_iov(frag))
+		return NULL;
+
+	return netmem_to_net_iov(frag->netmem);
+}
+
 /**
  * skb_frag_page - retrieve the page referred to by a paged fragment
  * @frag: the paged fragment
  *
- * Returns the &struct page associated with @frag.
+ * Return: the &struct page associated with @frag. Returns NULL if this frag
+ * has no associated page.
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
+	if (skb_frag_is_net_iov(frag))
+		return NULL;
+
 	return netmem_to_page(frag->netmem);
 }
 
+/**
+ * skb_frag_netmem - retrieve the netmem referred to by a fragment
+ * @frag: the fragment
+ *
+ * Return: the &netmem_ref associated with @frag.
+ */
+static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag)
+{
+	return frag->netmem;
+}
+
 int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
 		    unsigned int headroom);
 int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
 			 struct bpf_prog *prog);
+
 /**
  * skb_frag_address - gets the address of the data contained in a paged fragment
  * @frag: the paged fragment buffer
@@ -3548,6 +3585,9 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
  */
 static inline void *skb_frag_address(const skb_frag_t *frag)
 {
+	if (!skb_frag_page(frag))
+		return NULL;
+
 	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
 }
 
diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h
index 16c241a23472..0f3c58007488 100644
--- a/include/linux/skbuff_ref.h
+++ b/include/linux/skbuff_ref.h
@@ -34,14 +34,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
 
 bool napi_pp_put_page(netmem_ref netmem);
 
-static inline void
-skb_page_unref(struct page *page, bool recycle)
+static inline void skb_page_unref(netmem_ref netmem, bool recycle)
 {
 #ifdef CONFIG_PAGE_POOL
-	if (recycle && napi_pp_put_page(page_to_netmem(page)))
+	if (recycle && napi_pp_put_page(netmem))
 		return;
 #endif
-	put_page(page);
+	put_page(netmem_to_page(netmem));
 }
 
 /**
@@ -54,7 +53,7 @@ skb_page_unref(struct page *page, bool recycle)
  */
 static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 {
-	skb_page_unref(skb_frag_page(frag), recycle);
+	skb_page_unref(skb_frag_netmem(frag), recycle);
 }
 
 /**
-- 
cgit v1.2.3


From 65249feb6b3df9e17bab5911ee56fa7b0971e231 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:52 +0000
Subject: net: add support for skbs with unreadable frags

For device memory TCP, we expect the skb headers to be available in host
memory for access, and we expect the skb frags to be in device memory
and unaccessible to the host. We expect there to be no mixing and
matching of device memory frags (unaccessible) with host memory frags
(accessible) in the same skb.

Add a skb->devmem flag which indicates whether the frags in this skb
are device memory frags or not.

__skb_fill_netmem_desc() now checks frags added to skbs for net_iov,
and marks the skb as skb->devmem accordingly.

Add checks through the network stack to avoid accessing the frags of
devmem skbs and avoid coalescing devmem skbs with non devmem skbs.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-9-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 19 +++++++++++++++++--
 include/net/tcp.h      |  3 ++-
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1019f14583c9..39f1d16f3628 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -827,6 +827,8 @@ enum skb_tstamp_type {
  *	@csum_level: indicates the number of consecutive checksums found in
  *		the packet minus one that have been verified as
  *		CHECKSUM_UNNECESSARY (max 3)
+ *	@unreadable: indicates that at least 1 of the fragments in this skb is
+ *		unreadable.
  *	@dst_pending_confirm: need to confirm neighbour
  *	@decrypted: Decrypted SKB
  *	@slow_gro: state present at GRO time, slower prepare step required
@@ -1008,7 +1010,7 @@ struct sk_buff {
 #if IS_ENABLED(CONFIG_IP_SCTP)
 	__u8			csum_not_inet:1;
 #endif
-
+	__u8			unreadable:1;
 #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
 	__u16			tc_index;	/* traffic control index */
 #endif
@@ -1824,6 +1826,12 @@ static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
 		__skb_zcopy_downgrade_managed(skb);
 }
 
+/* Return true if frags in this skb are readable by the host. */
+static inline bool skb_frags_readable(const struct sk_buff *skb)
+{
+	return !skb->unreadable;
+}
+
 static inline void skb_mark_not_on_list(struct sk_buff *skb)
 {
 	skb->next = NULL;
@@ -2540,10 +2548,17 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
 static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
 					  netmem_ref netmem, int off, int size)
 {
-	struct page *page = netmem_to_page(netmem);
+	struct page *page;
 
 	__skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
 
+	if (netmem_is_net_iov(netmem)) {
+		skb->unreadable = true;
+		return;
+	}
+
+	page = netmem_to_page(netmem);
+
 	/* Propagate page pfmemalloc to the skb if we can. The problem is
 	 * that not all callers have unique ownership of the page but rely
 	 * on page_is_pfmemalloc doing the right thing(tm).
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2aac11e7e1cc..f77f812bfbe7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1069,7 +1069,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 	/* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */
 	return likely(tcp_skb_can_collapse_to(to) &&
 		      mptcp_skb_can_collapse(to, from) &&
-		      skb_pure_zcopy_same(to, from));
+		      skb_pure_zcopy_same(to, from) &&
+		      skb_frags_readable(to) == skb_frags_readable(from));
 }
 
 static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to,
-- 
cgit v1.2.3


From 8f0b3cc9a4c102c24808c87f1bc943659d7a7f9f Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:53 +0000
Subject: tcp: RX path for devmem TCP

In tcp_recvmsg_locked(), detect if the skb being received by the user
is a devmem skb. In this case - if the user provided the MSG_SOCK_DEVMEM
flag - pass it to tcp_recvmsg_devmem() for custom handling.

tcp_recvmsg_devmem() copies any data in the skb header to the linear
buffer, and returns a cmsg to the user indicating the number of bytes
returned in the linear buffer.

tcp_recvmsg_devmem() then loops over the unaccessible devmem skb frags,
and returns to the user a cmsg_devmem indicating the location of the
data in the dmabuf device memory. cmsg_devmem contains this information:

1. the offset into the dmabuf where the payload starts. 'frag_offset'.
2. the size of the frag. 'frag_size'.
3. an opaque token 'frag_token' to return to the kernel when the buffer
is to be released.

The pages awaiting freeing are stored in the newly added
sk->sk_user_frags, and each page passed to userspace is get_page()'d.
This reference is dropped once the userspace indicates that it is
done reading this page.  All pages are released when the socket is
destroyed.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240910171458.219195-10-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h            |  1 +
 include/net/sock.h                |  2 ++
 include/uapi/asm-generic/socket.h |  5 +++++
 include/uapi/linux/uio.h          | 13 +++++++++++++
 4 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index df9cdb8bbfb8..d18cc47e89bd 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -327,6 +327,7 @@ struct ucred {
 					  * plain text and require encryption
 					  */
 
+#define MSG_SOCK_DEVMEM 0x2000000	/* Receive devmem skbs as cmsg */
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
diff --git a/include/net/sock.h b/include/net/sock.h
index f51d61fab059..c58ca8dd561b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -337,6 +337,7 @@ struct sk_filter;
   *	@sk_txtime_report_errors: set report errors mode for SO_TXTIME
   *	@sk_txtime_unused: unused txtime flags
   *	@ns_tracker: tracker for netns reference
+  *	@sk_user_frags: xarray of pages the user is holding a reference on.
   */
 struct sock {
 	/*
@@ -542,6 +543,7 @@ struct sock {
 #endif
 	struct rcu_head		sk_rcu;
 	netns_tracker		ns_tracker;
+	struct xarray		sk_user_frags;
 };
 
 struct sock_bh_locked {
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 8ce8a39a1e5f..e993edc9c0ee 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -135,6 +135,11 @@
 #define SO_PASSPIDFD		76
 #define SO_PEERPIDFD		77
 
+#define SO_DEVMEM_LINEAR	78
+#define SCM_DEVMEM_LINEAR	SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF	79
+#define SCM_DEVMEM_DMABUF	SO_DEVMEM_DMABUF
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h
index 059b1a9147f4..3a22ddae376a 100644
--- a/include/uapi/linux/uio.h
+++ b/include/uapi/linux/uio.h
@@ -20,6 +20,19 @@ struct iovec
 	__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
 };
 
+struct dmabuf_cmsg {
+	__u64 frag_offset;	/* offset into the dmabuf where the frag starts.
+				 */
+	__u32 frag_size;	/* size of the frag. */
+	__u32 frag_token;	/* token representing this frag for
+				 * DEVMEM_DONTNEED.
+				 */
+	__u32  dmabuf_id;	/* dmabuf id this frag belongs to. */
+	__u32 flags;		/* Currently unused. Reserved for future
+				 * uses.
+				 */
+};
+
 /*
  *	UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
  */
-- 
cgit v1.2.3


From 678f6e28b5f6fc2316f2c0fed8f8903101f1e128 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:54 +0000
Subject: net: add SO_DEVMEM_DONTNEED setsockopt to release RX frags

Add an interface for the user to notify the kernel that it is done
reading the devmem dmabuf frags returned as cmsg. The kernel will
drop the reference on the frags to make them available for reuse.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240910171458.219195-11-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/asm-generic/socket.h | 1 +
 include/uapi/linux/uio.h          | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index e993edc9c0ee..3b4e3e815602 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -139,6 +139,7 @@
 #define SCM_DEVMEM_LINEAR	SO_DEVMEM_LINEAR
 #define SO_DEVMEM_DMABUF	79
 #define SCM_DEVMEM_DMABUF	SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED	80
 
 #if !defined(__KERNEL__)
 
diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h
index 3a22ddae376a..649739e0c404 100644
--- a/include/uapi/linux/uio.h
+++ b/include/uapi/linux/uio.h
@@ -33,6 +33,11 @@ struct dmabuf_cmsg {
 				 */
 };
 
+struct dmabuf_token {
+	__u32 token_start;
+	__u32 token_count;
+};
+
 /*
  *	UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
  */
-- 
cgit v1.2.3


From d0caf9876a1c9f844307effb598ad1312d9e0025 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Tue, 10 Sep 2024 17:14:57 +0000
Subject: netdev: add dmabuf introspection

Add dmabuf information to page_pool stats:

$ ./cli.py --spec ../netlink/specs/netdev.yaml --dump page-pool-get
...
 {'dmabuf': 10,
  'id': 456,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 455,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 454,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 453,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 452,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 451,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 450,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},
 {'dmabuf': 10,
  'id': 449,
  'ifindex': 3,
  'inflight': 1023,
  'inflight-mem': 4190208},

And queue stats:

$ ./cli.py --spec ../netlink/specs/netdev.yaml --dump queue-get
...
{'dmabuf': 10, 'id': 8, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 9, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 10, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 11, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 12, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 13, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 14, 'ifindex': 3, 'type': 'rx'},
{'dmabuf': 10, 'id': 15, 'ifindex': 3, 'type': 'rx'},

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-14-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 91bf3ecc5f1d..7c308f04e7a0 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -93,6 +93,7 @@ enum {
 	NETDEV_A_PAGE_POOL_INFLIGHT,
 	NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
 	NETDEV_A_PAGE_POOL_DETACH_TIME,
+	NETDEV_A_PAGE_POOL_DMABUF,
 
 	__NETDEV_A_PAGE_POOL_MAX,
 	NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,6 +132,7 @@ enum {
 	NETDEV_A_QUEUE_IFINDEX,
 	NETDEV_A_QUEUE_TYPE,
 	NETDEV_A_QUEUE_NAPI_ID,
+	NETDEV_A_QUEUE_DMABUF,
 
 	__NETDEV_A_QUEUE_MAX,
 	NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
-- 
cgit v1.2.3


From aa58bec064ab16224645776ce8e0af2fee46136a Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Mon, 9 Sep 2024 13:55:02 +0530
Subject: net: ethernet: oa_tc6: implement register write operation

Implement register write operation according to the control communication
specified in the OPEN Alliance 10BASE-T1x MACPHY Serial Interface
document. Control write commands are used by the SPI host to write
registers within the MAC-PHY. Each control write commands are composed of
a 32 bits control command header followed by register write data.

The MAC-PHY ignores the final 32 bits of data from the SPI host at the
end of the control write command. The write command and data is also
echoed from the MAC-PHY back to the SPI host to enable the SPI host to
identify which register write failed in the case of any bus errors.
Control write commands can write either a single register or multiple
consecutive registers. When multiple consecutive registers are written,
the address is automatically post-incremented by the MAC-PHY. Writing to
any unimplemented or undefined registers shall be ignored and yield no
effect.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Link: https://patch.msgid.link/20240909082514.262942-3-Parthiban.Veerasooran@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/oa_tc6.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 include/linux/oa_tc6.h

(limited to 'include')

diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
new file mode 100644
index 000000000000..99c490f1c8a8
--- /dev/null
+++ b/include/linux/oa_tc6.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface framework
+ *
+ * Link: https://opensig.org/download/document/OPEN_Alliance_10BASET1x_MAC-PHY_Serial_Interface_V1.1.pdf
+ *
+ * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
+ */
+
+#include <linux/spi/spi.h>
+
+struct oa_tc6;
+
+struct oa_tc6 *oa_tc6_init(struct spi_device *spi);
+int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value);
+int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
+			   u8 length);
-- 
cgit v1.2.3


From 375d1e0278cca70ce801d52c6b95c7a3c00f249c Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Mon, 9 Sep 2024 13:55:03 +0530
Subject: net: ethernet: oa_tc6: implement register read operation

Implement register read operation according to the control communication
specified in the OPEN Alliance 10BASE-T1x MACPHY Serial Interface
document. Control read commands are used by the SPI host to read
registers within the MAC-PHY. Each control read commands are composed of
a 32 bits control command header.

The MAC-PHY ignores all data from the SPI host following the control
header for the remainder of the control read command. Control read
commands can read either a single register or multiple consecutive
registers. When multiple consecutive registers are read, the address is
automatically post-incremented by the MAC-PHY. Reading any unimplemented
or undefined registers shall return zero.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Link: https://patch.msgid.link/20240909082514.262942-4-Parthiban.Veerasooran@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/oa_tc6.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
index 99c490f1c8a8..85aeecf87306 100644
--- a/include/linux/oa_tc6.h
+++ b/include/linux/oa_tc6.h
@@ -15,3 +15,6 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi);
 int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value);
 int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 			   u8 length);
+int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value);
+int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
+			  u8 length);
-- 
cgit v1.2.3


From 8f9bf857e43b3f75a098e3af3a6fec2d03203a1e Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Mon, 9 Sep 2024 13:55:06 +0530
Subject: net: ethernet: oa_tc6: implement internal PHY initialization

Internal PHY is initialized as per the PHY register capability supported
by the MAC-PHY. Direct PHY Register Access Capability indicates if PHY
registers are directly accessible within the SPI register memory space.
Indirect PHY Register Access Capability indicates if PHY registers are
indirectly accessible through the MDIO/MDC registers MDIOACCn defined in
OPEN Alliance specification. Currently the direct register access is only
supported.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Link: https://patch.msgid.link/20240909082514.262942-7-Parthiban.Veerasooran@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/oa_tc6.h    | 4 +++-
 include/uapi/linux/mdio.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
index 85aeecf87306..606ba9f1e663 100644
--- a/include/linux/oa_tc6.h
+++ b/include/linux/oa_tc6.h
@@ -7,11 +7,13 @@
  * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
  */
 
+#include <linux/etherdevice.h>
 #include <linux/spi/spi.h>
 
 struct oa_tc6;
 
-struct oa_tc6 *oa_tc6_init(struct spi_device *spi);
+struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev);
+void oa_tc6_exit(struct oa_tc6 *tc6);
 int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value);
 int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 			   u8 length);
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index c0c8ec995b06..f0d3f268240d 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -23,6 +23,7 @@
 #define MDIO_MMD_DTEXS		5	/* DTE Extender Sublayer */
 #define MDIO_MMD_TC		6	/* Transmission Convergence */
 #define MDIO_MMD_AN		7	/* Auto-Negotiation */
+#define MDIO_MMD_POWER_UNIT	13	/* PHY Power Unit */
 #define MDIO_MMD_C22EXT		29	/* Clause 22 extension */
 #define MDIO_MMD_VEND1		30	/* Vendor specific 1 */
 #define MDIO_MMD_VEND2		31	/* Vendor specific 2 */
-- 
cgit v1.2.3


From 53fbde8ab21e8c2c6187159cc17fc10cbf20900a Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Mon, 9 Sep 2024 13:55:09 +0530
Subject: net: ethernet: oa_tc6: implement transmit path to transfer tx
 ethernet frames

The transmit ethernet frame will be converted into multiple transmit data
chunks. Each transmit data chunk consists of a 4 bytes header followed by
a 64 bytes transmit data chunk payload. The 4 bytes data header occurs at
the beginning of each transmit data chunk on MOSI. The data header
contains the information needed to determine the validity and location of
the transmit frame data within the data chunk payload. The number of
transmit data chunks transmitted to mac-phy is limited to the number
transmit credits available in the mac-phy. Initially the transmit credits
will be updated from the buffer status register and then it will be
updated from the footer received on each spi data transfer. The received
footer will be examined for the transmit errors if any.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Link: https://patch.msgid.link/20240909082514.262942-10-Parthiban.Veerasooran@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/oa_tc6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
index 606ba9f1e663..5c7811ac9cbe 100644
--- a/include/linux/oa_tc6.h
+++ b/include/linux/oa_tc6.h
@@ -20,3 +20,4 @@ int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value);
 int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 			  u8 length);
+netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb);
-- 
cgit v1.2.3


From afd42170c8a6e68edd3f3a7f2aacd2bfbedb58b2 Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Mon, 9 Sep 2024 13:55:12 +0530
Subject: net: ethernet: oa_tc6: add helper function to enable zero align rx
 frame

Zero align receive frame feature can be enabled to align all receive
ethernet frames data to start at the beginning of any receive data chunk
payload with a start word offset (SWO) of zero. Receive frames may begin
anywhere within the receive data chunk payload when this feature is not
enabled.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Link: https://patch.msgid.link/20240909082514.262942-13-Parthiban.Veerasooran@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/oa_tc6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
index 5c7811ac9cbe..15f58e3c56c7 100644
--- a/include/linux/oa_tc6.h
+++ b/include/linux/oa_tc6.h
@@ -21,3 +21,4 @@ int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value);
 int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
 			  u8 length);
 netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb);
+int oa_tc6_zero_align_receive_frame_enable(struct oa_tc6 *tc6);
-- 
cgit v1.2.3


From 5e9b50dea970ae6d3e1309d4254157099734a2af Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 30 Aug 2024 15:04:59 +0200
Subject: fs: add f_pipe

Only regular files with FMODE_ATOMIC_POS and directories need
f_pos_lock. Place a new f_pipe member in a union with f_pos_lock
that they can use and make them stop abusing f_version in follow-up
patches.

Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-18-6d3e4816aa7b@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e6b3c1afb31..ca4925008244 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1001,6 +1001,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  * @f_cred: stashed credentials of creator/opener
  * @f_path: path of the file
  * @f_pos_lock: lock protecting file position
+ * @f_pipe: specific to pipes
  * @f_pos: file position
  * @f_version: file version
  * @f_security: LSM security context of this file
@@ -1026,7 +1027,12 @@ struct file {
 	const struct cred		*f_cred;
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	struct path			f_path;
-	struct mutex			f_pos_lock;
+	union {
+		/* regular files (with FMODE_ATOMIC_POS) and directories */
+		struct mutex		f_pos_lock;
+		/* pipes */
+		u64			f_pipe;
+	};
 	loff_t				f_pos;
 	u64				f_version;
 	/* --- cacheline 2 boundary (128 bytes) --- */
-- 
cgit v1.2.3


From 11068e0b64cbb540b96e577fcca0926242ecaf58 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 30 Aug 2024 15:05:01 +0200
Subject: fs: remove f_version

Now that detecting concurrent seeks is done by the filesystems that
require it we can remove f_version and free up 8 bytes for future
extensions.

Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-20-6d3e4816aa7b@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca4925008244..7e11ce172140 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1003,7 +1003,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  * @f_pos_lock: lock protecting file position
  * @f_pipe: specific to pipes
  * @f_pos: file position
- * @f_version: file version
  * @f_security: LSM security context of this file
  * @f_owner: file owner
  * @f_wb_err: writeback error
@@ -1034,11 +1033,10 @@ struct file {
 		u64			f_pipe;
 	};
 	loff_t				f_pos;
-	u64				f_version;
-	/* --- cacheline 2 boundary (128 bytes) --- */
 #ifdef CONFIG_SECURITY
 	void				*f_security;
 #endif
+	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct fown_struct		*f_owner;
 	errseq_t			f_wb_err;
 	errseq_t			f_sb_err;
-- 
cgit v1.2.3


From 2077006d4725c82c6e9612cec3a6c140921b067f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 10 Sep 2024 10:16:39 +0200
Subject: uidgid: make sure we fit into one cacheline

When I expanded uidgid mappings I intended for a struct uid_gid_map to
fit into a single cacheline on x86 as they tend to be pretty
performance sensitive (idmapped mounts etc). But a 4 byte hole was added
that brought it over 64 bytes. Fix that by adding the static extent
array and the extent counter into a substruct. C's type punning for
unions guarantees that we can access ->nr_extents even if the last
written to member wasn't within the same object. This is also what we
rely on in struct_group() and friends. This of course relies on
non-strict aliasing which we don't do.

99) If the member used to read the contents of a union object is not the
    same as the member last used to store a value in the object, the
    appropriate part of the object representation of the value is
    reinterpreted as an object representation in the new type as
    described in 6.2.6 (a process sometimes called "type punning").

Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf
Link: https://lore.kernel.org/r/20240910-work-uid_gid_map-v1-1-e6bc761363ed@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/user_namespace.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6030a8235617..3625096d5f85 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -21,9 +21,11 @@ struct uid_gid_extent {
 };
 
 struct uid_gid_map { /* 64 bytes -- 1 cache line */
-	u32 nr_extents;
 	union {
-		struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
+		struct {
+			struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
+			u32 nr_extents;
+		};
 		struct {
 			struct uid_gid_extent *forward;
 			struct uid_gid_extent *reverse;
-- 
cgit v1.2.3


From db0aa2e9566fda2d23dc8f6c102856ead95578a4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2024 00:20:42 +0100
Subject: mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of
 folios

Define a data structure, struct folio_queue, to represent a sequence of
folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a
list of folio_queue structures to be used to provide a buffer to
iov_iter-taking functions, such as sendmsg and recvmsg.

The folio_queue structure looks like:

	struct folio_queue {
		struct folio_batch	vec;
		u8			orders[PAGEVEC_SIZE];
		struct folio_queue	*next;
		struct folio_queue	*prev;
		unsigned long		marks;
		unsigned long		marks2;
	};

It does not use a list_head so that next and/or prev can be set to NULL at
the ends of the list, allowing iov_iter-handling routines to determine that
they *are* the ends without needing to store a head pointer in the iov_iter
struct.

A folio_batch struct is used to hold the folio pointers which allows the
batch to be passed to batch handling functions.  Two mark bits are
available per slot.  The intention is to use at least one of them to mark
folios that need putting, but that might not be ultimately necessary.
Accessor functions are used to access the slots to do the masking and an
additional accessor function is used to indicate the size of the array.

The order of each folio is also stored in the structure to avoid the need
for iov_iter_advance() and iov_iter_revert() to have to query each folio to
find its size.

With careful barriering, this can be used as an extending buffer with new
folios inserted and new folio_queue structs added without the need for a
lock.  Further, provided we always keep at least one struct in the buffer,
we can also remove consumed folios and consumed structs from the head end
as we without the need for locks.

[Questions/thoughts]

 (1) To manage this, I need a head pointer, a tail pointer, a tail slot
     number (assuming insertion happens at the tail end and the next
     pointers point from head to tail).  Should I put these into a struct
     of their own, say "folio_queue_head" or "rolling_buffer"?

     I will end up with two of these in netfs_io_request eventually, one
     keeping track of the pagecache I'm dealing with for buffered I/O and
     the other to hold a bounce buffer when we need one.

 (2) Should I make the slots {folio,off,len} or bio_vec?

 (3) This is intended to replace ITER_XARRAY eventually.  Using an xarray
     in I/O iteration requires the taking of the RCU read lock, doing
     copying under the RCU read lock, walking the xarray (which may change
     under us), handling retries and dealing with special values.

     The advantage of ITER_XARRAY is that when we're dealing with the
     pagecache directly, we don't need any allocation - but if we're doing
     encrypted comms, there's a good chance we'd be using a bounce buffer
     anyway.

     This will require afs, erofs, cifs, orangefs and fscache to be
     converted to not use this.  afs still uses it for dirs and symlinks;
     some of erofs usages should be easy to change, but there's one which
     won't be so easy; ceph's use via fscache can be fixed by porting ceph
     to netfslib; cifs is using xarray as a bounce buffer - that can be
     moved to use sheaves instead; and orangefs has a similar problem to
     erofs - maybe orangefs could use netfslib?

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Gao Xiang <xiang@kernel.org>
cc: Mike Marshall <hubcap@omnibond.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: linux-erofs@lists.ozlabs.org
cc: devel@lists.orangefs.org
Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/folio_queue.h | 138 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iov_iter.h    |  57 ++++++++++++++++++
 include/linux/uio.h         |  12 ++++
 3 files changed, 207 insertions(+)
 create mode 100644 include/linux/folio_queue.h

(limited to 'include')

diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
new file mode 100644
index 000000000000..52773613bf23
--- /dev/null
+++ b/include/linux/folio_queue.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Queue of folios definitions
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_FOLIO_QUEUE_H
+#define _LINUX_FOLIO_QUEUE_H
+
+#include <linux/pagevec.h>
+
+/*
+ * Segment in a queue of running buffers.  Each segment can hold a number of
+ * folios and a portion of the queue can be referenced with the ITER_FOLIOQ
+ * iterator.  The possibility exists of inserting non-folio elements into the
+ * queue (such as gaps).
+ *
+ * Explicit prev and next pointers are used instead of a list_head to make it
+ * easier to add segments to tail and remove them from the head without the
+ * need for a lock.
+ */
+struct folio_queue {
+	struct folio_batch	vec;		/* Folios in the queue segment */
+	u8			orders[PAGEVEC_SIZE]; /* Order of each folio */
+	struct folio_queue	*next;		/* Next queue segment or NULL */
+	struct folio_queue	*prev;		/* Previous queue segment of NULL */
+	unsigned long		marks;		/* 1-bit mark per folio */
+	unsigned long		marks2;		/* Second 1-bit mark per folio */
+#if PAGEVEC_SIZE > BITS_PER_LONG
+#error marks is not big enough
+#endif
+};
+
+static inline void folioq_init(struct folio_queue *folioq)
+{
+	folio_batch_init(&folioq->vec);
+	folioq->next = NULL;
+	folioq->prev = NULL;
+	folioq->marks = 0;
+	folioq->marks2 = 0;
+}
+
+static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
+{
+	return PAGEVEC_SIZE;
+}
+
+static inline unsigned int folioq_count(struct folio_queue *folioq)
+{
+	return folio_batch_count(&folioq->vec);
+}
+
+static inline bool folioq_full(struct folio_queue *folioq)
+{
+	//return !folio_batch_space(&folioq->vec);
+	return folioq_count(folioq) >= folioq_nr_slots(folioq);
+}
+
+static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot)
+{
+	return test_bit(slot, &folioq->marks);
+}
+
+static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot)
+{
+	set_bit(slot, &folioq->marks);
+}
+
+static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot)
+{
+	clear_bit(slot, &folioq->marks);
+}
+
+static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot)
+{
+	return test_bit(slot, &folioq->marks2);
+}
+
+static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot)
+{
+	set_bit(slot, &folioq->marks2);
+}
+
+static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
+{
+	clear_bit(slot, &folioq->marks2);
+}
+
+static inline unsigned int __folio_order(struct folio *folio)
+{
+	if (!folio_test_large(folio))
+		return 0;
+	return folio->_flags_1 & 0xff;
+}
+
+static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio)
+{
+	unsigned int slot = folioq->vec.nr++;
+
+	folioq->vec.folios[slot] = folio;
+	folioq->orders[slot] = __folio_order(folio);
+	return slot;
+}
+
+static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio)
+{
+	unsigned int slot = folioq->vec.nr++;
+
+	folioq->vec.folios[slot] = folio;
+	folioq->orders[slot] = __folio_order(folio);
+	folioq_mark(folioq, slot);
+	return slot;
+}
+
+static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot)
+{
+	return folioq->vec.folios[slot];
+}
+
+static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot)
+{
+	return folioq->orders[slot];
+}
+
+static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot)
+{
+	return PAGE_SIZE << folioq_folio_order(folioq, slot);
+}
+
+static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
+{
+	folioq->vec.folios[slot] = NULL;
+	folioq_unmark(folioq, slot);
+	folioq_unmark2(folioq, slot);
+}
+
+#endif /* _LINUX_FOLIO_QUEUE_H */
diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
index 270454a6703d..a223370a59a7 100644
--- a/include/linux/iov_iter.h
+++ b/include/linux/iov_iter.h
@@ -10,6 +10,7 @@
 
 #include <linux/uio.h>
 #include <linux/bvec.h>
+#include <linux/folio_queue.h>
 
 typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
 			     void *priv, void *priv2);
@@ -140,6 +141,60 @@ size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
 	return progress;
 }
 
+/*
+ * Handle ITER_FOLIOQ.
+ */
+static __always_inline
+size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		      iov_step_f step)
+{
+	const struct folio_queue *folioq = iter->folioq;
+	unsigned int slot = iter->folioq_slot;
+	size_t progress = 0, skip = iter->iov_offset;
+
+	if (slot == folioq_nr_slots(folioq)) {
+		/* The iterator may have been extended. */
+		folioq = folioq->next;
+		slot = 0;
+	}
+
+	do {
+		struct folio *folio = folioq_folio(folioq, slot);
+		size_t part, remain, consumed;
+		size_t fsize;
+		void *base;
+
+		if (!folio)
+			break;
+
+		fsize = folioq_folio_size(folioq, slot);
+		base = kmap_local_folio(folio, skip);
+		part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
+		remain = step(base, progress, part, priv, priv2);
+		kunmap_local(base);
+		consumed = part - remain;
+		len -= consumed;
+		progress += consumed;
+		skip += consumed;
+		if (skip >= fsize) {
+			skip = 0;
+			slot++;
+			if (slot == folioq_nr_slots(folioq) && folioq->next) {
+				folioq = folioq->next;
+				slot = 0;
+			}
+		}
+		if (remain)
+			break;
+	} while (len);
+
+	iter->folioq_slot = slot;
+	iter->folioq = folioq;
+	iter->iov_offset = skip;
+	iter->count -= progress;
+	return progress;
+}
+
 /*
  * Handle ITER_XARRAY.
  */
@@ -249,6 +304,8 @@ size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
 		return iterate_bvec(iter, len, priv, priv2, step);
 	if (iov_iter_is_kvec(iter))
 		return iterate_kvec(iter, len, priv, priv2, step);
+	if (iov_iter_is_folioq(iter))
+		return iterate_folioq(iter, len, priv, priv2, step);
 	if (iov_iter_is_xarray(iter))
 		return iterate_xarray(iter, len, priv, priv2, step);
 	return iterate_discard(iter, len, priv, priv2, step);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 7020adedfa08..845d110acadc 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -11,6 +11,7 @@
 #include <uapi/linux/uio.h>
 
 struct page;
+struct folio_queue;
 
 typedef unsigned int __bitwise iov_iter_extraction_t;
 
@@ -25,6 +26,7 @@ enum iter_type {
 	ITER_IOVEC,
 	ITER_BVEC,
 	ITER_KVEC,
+	ITER_FOLIOQ,
 	ITER_XARRAY,
 	ITER_DISCARD,
 };
@@ -66,6 +68,7 @@ struct iov_iter {
 				const struct iovec *__iov;
 				const struct kvec *kvec;
 				const struct bio_vec *bvec;
+				const struct folio_queue *folioq;
 				struct xarray *xarray;
 				void __user *ubuf;
 			};
@@ -74,6 +77,7 @@ struct iov_iter {
 	};
 	union {
 		unsigned long nr_segs;
+		u8 folioq_slot;
 		loff_t xarray_start;
 	};
 };
@@ -126,6 +130,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_DISCARD;
 }
 
+static inline bool iov_iter_is_folioq(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_FOLIOQ;
+}
+
 static inline bool iov_iter_is_xarray(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_XARRAY;
@@ -273,6 +282,9 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec
 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
 			unsigned long nr_segs, size_t count);
 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
+void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
+			  const struct folio_queue *folioq,
+			  unsigned int first_slot, unsigned int offset, size_t count);
 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
 		     loff_t start, size_t count);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
-- 
cgit v1.2.3


From 197a3de607d92b3d72e69edf5470e0a8fae548cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 14 Aug 2024 16:14:21 +0100
Subject: iov_iter: Provide copy_folio_from_iter()

Provide a copy_folio_from_iter() wrapper.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Alexander Viro <viro@zeniv.linux.org.uk>
cc: Christian Brauner <christian@brauner.io>
cc: Matthew Wilcox <willy@infradead.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/20240814203850.2240469-14-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/uio.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 845d110acadc..853f9de5aa05 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -189,6 +189,12 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
 	return copy_page_to_iter(&folio->page, offset, bytes, i);
 }
 
+static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset,
+					  size_t bytes, struct iov_iter *i)
+{
+	return copy_page_from_iter(&folio->page, offset, bytes, i);
+}
+
 static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
 		size_t offset, size_t bytes, struct iov_iter *i)
 {
-- 
cgit v1.2.3


From cd0277ed0c188dd40e7744e89299af7b78831ca4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 May 2024 21:47:07 +0100
Subject: netfs: Use new folio_queue data type and iterator instead of xarray
 iter

Make the netfs write-side routines use the new folio_queue struct to hold a
rolling buffer of folios, with the issuer adding folios at the tail and the
collector removing them from the head as they're processed instead of using
an xarray.

This will allow a subsequent patch to simplify the write collector.

The primary mark (as tested by folioq_is_marked()) is used to note if the
corresponding folio needs putting.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-16-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        | 8 ++++----
 include/trace/events/netfs.h | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 63ab2c775a21..f7b444f4f251 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -38,10 +38,6 @@ static inline void folio_start_private_2(struct folio *folio)
 	folio_set_private_2(folio);
 }
 
-/* Marks used on xarray-based buffers */
-#define NETFS_BUF_PUT_MARK	XA_MARK_0	/* - Page needs putting  */
-#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1	/* - Page needs wb/dirty flag wrangling */
-
 enum netfs_io_source {
 	NETFS_SOURCE_UNKNOWN,
 	NETFS_FILL_WITH_ZEROES,
@@ -233,6 +229,8 @@ struct netfs_io_request {
 	struct netfs_io_stream	io_streams[2];	/* Streams of parallel I/O operations */
 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
 	struct netfs_group	*group;		/* Writeback group being written back */
+	struct folio_queue	*buffer;	/* Head of I/O buffer */
+	struct folio_queue	*buffer_tail;	/* Tail of I/O buffer */
 	struct iov_iter		iter;		/* Unencrypted-side iterator */
 	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
 	void			*netfs_priv;	/* Private data for the netfs */
@@ -254,6 +252,8 @@ struct netfs_io_request {
 	short			error;		/* 0 or error that occurred */
 	enum netfs_io_origin	origin;		/* Origin of the request */
 	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
+	u8			buffer_head_slot; /* First slot in ->buffer */
+	u8			buffer_tail_slot; /* Next slot in ->buffer_tail */
 	unsigned long long	i_size;		/* Size of the file */
 	unsigned long long	start;		/* Start position */
 	atomic64_t		issued_to;	/* Write issuer folio cursor */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 47cd11aaccac..4e13774a06e6 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -153,6 +153,7 @@
 	EM(netfs_folio_trace_mkwrite,		"mkwrite")	\
 	EM(netfs_folio_trace_mkwrite_plus,	"mkwrite+")	\
 	EM(netfs_folio_trace_not_under_wback,	"!wback")	\
+	EM(netfs_folio_trace_put,		"put")		\
 	EM(netfs_folio_trace_read_gaps,		"read-gaps")	\
 	EM(netfs_folio_trace_redirtied,		"redirtied")	\
 	EM(netfs_folio_trace_store,		"store")	\
-- 
cgit v1.2.3


From 983cdcf8fe141b0ce16bc71959a5dc55bcb0764d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 6 Jun 2024 07:48:55 +0100
Subject: netfs: Simplify the writeback code

Use the new folio_queue structures to simplify the writeback code.  The
problem with referring to the i_pages xarray directly is that we may have
gaps in the sequence of folios we're writing from that we need to skip when
we're removing the writeback mark from the folios we're writing back from.

At the moment the code tries to deal with this by carefully tracking the
gaps in each writeback stream (eg. write to server and write to cache) and
divining when there's a gap that spans folios (something that's not helped
by folios not being a consistent size).

Instead, the folio_queue buffer contains pointers only the folios we're
dealing with, has them in ascending order and indicates a gap by placing
non-consequitive folios next to each other.  This makes it possible to
track where we need to clean up to by just keeping track of where we've
processed to on each stream and taking the minimum.

Note that the I/O iterator is always rounded up to the end of the folio,
even if that is beyond the EOF position, so that the cache can do DIO from
the page.  The excess space is cleared, though mmapped writes clobber it.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-18-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        |  1 -
 include/trace/events/netfs.h | 33 ++-------------------------------
 2 files changed, 2 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index f7b444f4f251..bd0e3d147822 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -257,7 +257,6 @@ struct netfs_io_request {
 	unsigned long long	i_size;		/* Size of the file */
 	unsigned long long	start;		/* Start position */
 	atomic64_t		issued_to;	/* Write issuer folio cursor */
-	unsigned long long	contiguity;	/* Tracking for gaps in the writeback sequence */
 	unsigned long long	collected_to;	/* Point we've collected to */
 	unsigned long long	cleaned_to;	/* Position we've cleaned folios to */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 4e13774a06e6..58bf23002fc1 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -512,33 +512,6 @@ TRACE_EVENT(netfs_collect,
 		      __entry->start + __entry->len)
 	    );
 
-TRACE_EVENT(netfs_collect_contig,
-	    TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to,
-		     enum netfs_collect_contig_trace type),
-
-	    TP_ARGS(wreq, to, type),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		wreq)
-		    __field(enum netfs_collect_contig_trace, type)
-		    __field(unsigned long long,		contiguity)
-		    __field(unsigned long long,		to)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->wreq	= wreq->debug_id;
-		    __entry->type	= type;
-		    __entry->contiguity	= wreq->contiguity;
-		    __entry->to		= to;
-			   ),
-
-	    TP_printk("R=%08x %llx -> %llx %s",
-		      __entry->wreq,
-		      __entry->contiguity,
-		      __entry->to,
-		      __print_symbolic(__entry->type, netfs_collect_contig_traces))
-	    );
-
 TRACE_EVENT(netfs_collect_sreq,
 	    TP_PROTO(const struct netfs_io_request *wreq,
 		     const struct netfs_io_subrequest *subreq),
@@ -610,7 +583,6 @@ TRACE_EVENT(netfs_collect_state,
 		    __field(unsigned int,	notes		)
 		    __field(unsigned long long,	collected_to	)
 		    __field(unsigned long long,	cleaned_to	)
-		    __field(unsigned long long,	contiguity	)
 			     ),
 
 	    TP_fast_assign(
@@ -618,12 +590,11 @@ TRACE_EVENT(netfs_collect_state,
 		    __entry->notes	= notes;
 		    __entry->collected_to = collected_to;
 		    __entry->cleaned_to	= wreq->cleaned_to;
-		    __entry->contiguity = wreq->contiguity;
 			   ),
 
-	    TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x",
+	    TP_printk("R=%08x col=%llx cln=%llx n=%x",
 		      __entry->wreq, __entry->collected_to,
-		      __entry->cleaned_to, __entry->contiguity,
+		      __entry->cleaned_to,
 		      __entry->notes)
 	    );
 
-- 
cgit v1.2.3


From ee4cdf7ba857a894ad1650d6ab77669cbbfa329e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Jul 2024 00:40:22 +0100
Subject: netfs: Speed up buffered reading

Improve the efficiency of buffered reads in a number of ways:

 (1) Overhaul the algorithm in general so that it's a lot more compact and
     split the read submission code between buffered and unbuffered
     versions.  The unbuffered version can be vastly simplified.

 (2) Read-result collection is handed off to a work queue rather than being
     done in the I/O thread.  Multiple subrequests can be processes
     simultaneously.

 (3) When a subrequest is collected, any folios it fully spans are
     collected and "spare" data on either side is donated to either the
     previous or the next subrequest in the sequence.

Notes:

 (*) Readahead expansion is massively slows down fio, presumably because it
     causes a load of extra allocations, both folio and xarray, up front
     before RPC requests can be transmitted.

 (*) RDMA with cifs does appear to work, both with SIW and RXE.

 (*) PG_private_2-based reading and copy-to-cache is split out into its own
     file and altered to use folio_queue.  Note that the copy to the cache
     now creates a new write transaction against the cache and adds the
     folios to be copied into it.  This allows it to use part of the
     writeback I/O code.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/folio_queue.h  |  18 ++++++++
 include/linux/netfs.h        |  26 +++++++----
 include/trace/events/netfs.h | 103 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 134 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 52773613bf23..955680c3bb5f 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -27,6 +27,7 @@ struct folio_queue {
 	struct folio_queue	*prev;		/* Previous queue segment of NULL */
 	unsigned long		marks;		/* 1-bit mark per folio */
 	unsigned long		marks2;		/* Second 1-bit mark per folio */
+	unsigned long		marks3;		/* Third 1-bit mark per folio */
 #if PAGEVEC_SIZE > BITS_PER_LONG
 #error marks is not big enough
 #endif
@@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq)
 	folioq->prev = NULL;
 	folioq->marks = 0;
 	folioq->marks2 = 0;
+	folioq->marks3 = 0;
 }
 
 static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
@@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
 	clear_bit(slot, &folioq->marks2);
 }
 
+static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot)
+{
+	return test_bit(slot, &folioq->marks3);
+}
+
+static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot)
+{
+	set_bit(slot, &folioq->marks3);
+}
+
+static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
+{
+	clear_bit(slot, &folioq->marks3);
+}
+
 static inline unsigned int __folio_order(struct folio *folio)
 {
 	if (!folio_test_large(folio))
@@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
 	folioq->vec.folios[slot] = NULL;
 	folioq_unmark(folioq, slot);
 	folioq_unmark2(folioq, slot);
+	folioq_unmark3(folioq, slot);
 }
 
 #endif /* _LINUX_FOLIO_QUEUE_H */
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index bd0e3d147822..c0f0c9c87d86 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -178,36 +178,43 @@ struct netfs_io_subrequest {
 	unsigned long long	start;		/* Where to start the I/O */
 	size_t			len;		/* Size of the I/O */
 	size_t			transferred;	/* Amount of data transferred */
+	size_t			consumed;	/* Amount of read data consumed */
+	size_t			prev_donated;	/* Amount of data donated from previous subreq */
+	size_t			next_donated;	/* Amount of data donated from next subreq */
 	refcount_t		ref;
 	short			error;		/* 0 or error that occurred */
 	unsigned short		debug_index;	/* Index in list (for debugging output) */
 	unsigned int		nr_segs;	/* Number of segs in io_iter */
 	enum netfs_io_source	source;		/* Where to read from/write to */
 	unsigned char		stream_nr;	/* I/O stream this belongs to */
+	unsigned char		curr_folioq_slot; /* Folio currently being read */
+	unsigned char		curr_folio_order; /* Order of folio */
+	struct folio_queue	*curr_folioq;	/* Queue segment in which current folio resides */
 	unsigned long		flags;
 #define NETFS_SREQ_COPY_TO_CACHE	0	/* Set if should copy the data to the cache */
 #define NETFS_SREQ_CLEAR_TAIL		1	/* Set if the rest of the read should be cleared */
-#define NETFS_SREQ_SHORT_IO		2	/* Set if the I/O was short */
 #define NETFS_SREQ_SEEK_DATA_READ	3	/* Set if ->read() should SEEK_DATA first */
 #define NETFS_SREQ_NO_PROGRESS		4	/* Set if we didn't manage to read any data */
 #define NETFS_SREQ_ONDEMAND		5	/* Set if it's from on-demand read mode */
 #define NETFS_SREQ_BOUNDARY		6	/* Set if ends on hard boundary (eg. ceph object) */
+#define NETFS_SREQ_HIT_EOF		7	/* Set if short due to EOF */
 #define NETFS_SREQ_IN_PROGRESS		8	/* Unlocked when the subrequest completes */
 #define NETFS_SREQ_NEED_RETRY		9	/* Set if the filesystem requests a retry */
 #define NETFS_SREQ_RETRYING		10	/* Set if we're retrying */
 #define NETFS_SREQ_FAILED		11	/* Set if the subreq failed unretryably */
-#define NETFS_SREQ_HIT_EOF		12	/* Set if we hit the EOF */
 };
 
 enum netfs_io_origin {
 	NETFS_READAHEAD,		/* This read was triggered by readahead */
 	NETFS_READPAGE,			/* This read is a synchronous read */
+	NETFS_READ_GAPS,		/* This read is a synchronous read to fill gaps */
 	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
 	NETFS_DIO_READ,			/* This is a direct I/O read */
 	NETFS_WRITEBACK,		/* This write was triggered by writepages */
 	NETFS_WRITETHROUGH,		/* This write was made by netfs_perform_write() */
 	NETFS_UNBUFFERED_WRITE,		/* This is an unbuffered write */
 	NETFS_DIO_WRITE,		/* This is a direct I/O write */
+	NETFS_PGPRIV2_COPY_TO_CACHE,	/* [DEPRECATED] This is writing read data to the cache */
 	nr__netfs_io_origin
 } __mode(byte);
 
@@ -224,6 +231,7 @@ struct netfs_io_request {
 	struct address_space	*mapping;	/* The mapping being accessed */
 	struct kiocb		*iocb;		/* AIO completion vector */
 	struct netfs_cache_resources cache_resources;
+	struct readahead_control *ractl;	/* Readahead descriptor */
 	struct list_head	proc_link;	/* Link in netfs_iorequests */
 	struct list_head	subrequests;	/* Contributory I/O operations */
 	struct netfs_io_stream	io_streams[2];	/* Streams of parallel I/O operations */
@@ -244,12 +252,10 @@ struct netfs_io_request {
 	unsigned int		nr_group_rel;	/* Number of refs to release on ->group */
 	spinlock_t		lock;		/* Lock for queuing subreqs */
 	atomic_t		nr_outstanding;	/* Number of ops in progress */
-	atomic_t		nr_copy_ops;	/* Number of copy-to-cache ops in progress */
-	size_t			upper_len;	/* Length can be extended to here */
 	unsigned long long	submitted;	/* Amount submitted for I/O so far */
 	unsigned long long	len;		/* Length of the request */
 	size_t			transferred;	/* Amount to be indicated as transferred */
-	short			error;		/* 0 or error that occurred */
+	long			error;		/* 0 or error that occurred */
 	enum netfs_io_origin	origin;		/* Origin of the request */
 	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
 	u8			buffer_head_slot; /* First slot in ->buffer */
@@ -260,9 +266,9 @@ struct netfs_io_request {
 	unsigned long long	collected_to;	/* Point we've collected to */
 	unsigned long long	cleaned_to;	/* Position we've cleaned folios to */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
+	size_t			prev_donated;	/* Fallback for subreq->prev_donated */
 	refcount_t		ref;
 	unsigned long		flags;
-#define NETFS_RREQ_INCOMPLETE_IO	0	/* Some ioreqs terminated short or with error */
 #define NETFS_RREQ_COPY_TO_CACHE	1	/* Need to write to the cache */
 #define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
@@ -274,6 +280,7 @@ struct netfs_io_request {
 #define NETFS_RREQ_PAUSE		11	/* Pause subrequest generation */
 #define NETFS_RREQ_USE_IO_ITER		12	/* Use ->io_iter rather than ->i_pages */
 #define NETFS_RREQ_ALL_QUEUED		13	/* All subreqs are now queued */
+#define NETFS_RREQ_NEED_RETRY		14	/* Need to try retrying */
 #define NETFS_RREQ_USE_PGPRIV2		31	/* [DEPRECATED] Use PG_private_2 to mark
 						 * write to cache on read */
 	const struct netfs_request_ops *netfs_ops;
@@ -292,7 +299,7 @@ struct netfs_request_ops {
 
 	/* Read request handling */
 	void (*expand_readahead)(struct netfs_io_request *rreq);
-	bool (*clamp_length)(struct netfs_io_subrequest *subreq);
+	int (*prepare_read)(struct netfs_io_subrequest *subreq);
 	void (*issue_read)(struct netfs_io_subrequest *subreq);
 	bool (*is_still_valid)(struct netfs_io_request *rreq);
 	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
@@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
 
 /* (Sub)request management API. */
-void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
+				bool was_async);
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
+				  int error, bool was_async);
 void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
 			  enum netfs_sreq_ref_trace what);
 void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 58bf23002fc1..7b26463cb98f 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -20,6 +20,7 @@
 	EM(netfs_read_trace_expanded,		"EXPANDED ")	\
 	EM(netfs_read_trace_readahead,		"READAHEAD")	\
 	EM(netfs_read_trace_readpage,		"READPAGE ")	\
+	EM(netfs_read_trace_read_gaps,		"READ-GAPS")	\
 	EM(netfs_read_trace_prefetch_for_write,	"PREFETCHW")	\
 	E_(netfs_read_trace_write_begin,	"WRITEBEGN")
 
@@ -33,12 +34,14 @@
 #define netfs_rreq_origins					\
 	EM(NETFS_READAHEAD,			"RA")		\
 	EM(NETFS_READPAGE,			"RP")		\
+	EM(NETFS_READ_GAPS,			"RG")		\
 	EM(NETFS_READ_FOR_WRITE,		"RW")		\
 	EM(NETFS_DIO_READ,			"DR")		\
 	EM(NETFS_WRITEBACK,			"WB")		\
 	EM(NETFS_WRITETHROUGH,			"WT")		\
 	EM(NETFS_UNBUFFERED_WRITE,		"UW")		\
-	E_(NETFS_DIO_WRITE,			"DW")
+	EM(NETFS_DIO_WRITE,			"DW")		\
+	E_(NETFS_PGPRIV2_COPY_TO_CACHE,		"2C")
 
 #define netfs_rreq_traces					\
 	EM(netfs_rreq_trace_assess,		"ASSESS ")	\
@@ -69,15 +72,25 @@
 	E_(NETFS_INVALID_WRITE,			"INVL")
 
 #define netfs_sreq_traces					\
+	EM(netfs_sreq_trace_add_donations,	"+DON ")	\
+	EM(netfs_sreq_trace_added,		"ADD  ")	\
+	EM(netfs_sreq_trace_clear,		"CLEAR")	\
 	EM(netfs_sreq_trace_discard,		"DSCRD")	\
+	EM(netfs_sreq_trace_donate_to_prev,	"DON-P")	\
+	EM(netfs_sreq_trace_donate_to_next,	"DON-N")	\
 	EM(netfs_sreq_trace_download_instead,	"RDOWN")	\
 	EM(netfs_sreq_trace_fail,		"FAIL ")	\
 	EM(netfs_sreq_trace_free,		"FREE ")	\
+	EM(netfs_sreq_trace_hit_eof,		"EOF  ")	\
+	EM(netfs_sreq_trace_io_progress,	"IO   ")	\
 	EM(netfs_sreq_trace_limited,		"LIMIT")	\
 	EM(netfs_sreq_trace_prepare,		"PREP ")	\
 	EM(netfs_sreq_trace_prep_failed,	"PRPFL")	\
-	EM(netfs_sreq_trace_resubmit_short,	"SHORT")	\
+	EM(netfs_sreq_trace_progress,		"PRGRS")	\
+	EM(netfs_sreq_trace_reprep_failed,	"REPFL")	\
 	EM(netfs_sreq_trace_retry,		"RETRY")	\
+	EM(netfs_sreq_trace_short,		"SHORT")	\
+	EM(netfs_sreq_trace_split,		"SPLIT")	\
 	EM(netfs_sreq_trace_submit,		"SUBMT")	\
 	EM(netfs_sreq_trace_terminated,		"TERM ")	\
 	EM(netfs_sreq_trace_write,		"WRITE")	\
@@ -118,7 +131,7 @@
 	EM(netfs_sreq_trace_new,		"NEW        ")	\
 	EM(netfs_sreq_trace_put_cancel,		"PUT CANCEL ")	\
 	EM(netfs_sreq_trace_put_clear,		"PUT CLEAR  ")	\
-	EM(netfs_sreq_trace_put_discard,	"PUT DISCARD")	\
+	EM(netfs_sreq_trace_put_consumed,	"PUT CONSUME")	\
 	EM(netfs_sreq_trace_put_done,		"PUT DONE   ")	\
 	EM(netfs_sreq_trace_put_failed,		"PUT FAILED ")	\
 	EM(netfs_sreq_trace_put_merged,		"PUT MERGED ")	\
@@ -138,6 +151,7 @@
 	EM(netfs_flush_content,			"flush")	\
 	EM(netfs_streaming_filled_page,		"mod-streamw-f") \
 	EM(netfs_streaming_cont_filled_page,	"mod-streamw-f+") \
+	EM(netfs_folio_trace_abandon,		"abandon")	\
 	EM(netfs_folio_trace_cancel_copy,	"cancel-copy")	\
 	EM(netfs_folio_trace_clear,		"clear")	\
 	EM(netfs_folio_trace_clear_cc,		"clear-cc")	\
@@ -154,7 +168,11 @@
 	EM(netfs_folio_trace_mkwrite_plus,	"mkwrite+")	\
 	EM(netfs_folio_trace_not_under_wback,	"!wback")	\
 	EM(netfs_folio_trace_put,		"put")		\
+	EM(netfs_folio_trace_read,		"read")		\
+	EM(netfs_folio_trace_read_done,		"read-done")	\
 	EM(netfs_folio_trace_read_gaps,		"read-gaps")	\
+	EM(netfs_folio_trace_read_put,		"read-put")	\
+	EM(netfs_folio_trace_read_unlock,	"read-unlock")	\
 	EM(netfs_folio_trace_redirtied,		"redirtied")	\
 	EM(netfs_folio_trace_store,		"store")	\
 	EM(netfs_folio_trace_store_copy,	"store-copy")	\
@@ -167,6 +185,12 @@
 	EM(netfs_contig_trace_jump,		"-->JUMP-->")	\
 	E_(netfs_contig_trace_unlock,		"Unlock")
 
+#define netfs_donate_traces					\
+	EM(netfs_trace_donate_tail_to_prev,	"tail-to-prev")	\
+	EM(netfs_trace_donate_to_prev,		"to-prev")	\
+	EM(netfs_trace_donate_to_next,		"to-next")	\
+	E_(netfs_trace_donate_to_deferred_next,	"defer-next")
+
 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
@@ -184,6 +208,7 @@ enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte);
 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
 enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
 enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte);
+enum netfs_donate_trace { netfs_donate_traces } __mode(byte);
 
 #endif
 
@@ -206,6 +231,7 @@ netfs_rreq_ref_traces;
 netfs_sreq_ref_traces;
 netfs_folio_traces;
 netfs_collect_contig_traces;
+netfs_donate_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -226,6 +252,7 @@ TRACE_EVENT(netfs_read,
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		rreq		)
 		    __field(unsigned int,		cookie		)
+		    __field(loff_t,			i_size		)
 		    __field(loff_t,			start		)
 		    __field(size_t,			len		)
 		    __field(enum netfs_read_trace,	what		)
@@ -235,18 +262,19 @@ TRACE_EVENT(netfs_read,
 	    TP_fast_assign(
 		    __entry->rreq	= rreq->debug_id;
 		    __entry->cookie	= rreq->cache_resources.debug_id;
+		    __entry->i_size	= rreq->i_size;
 		    __entry->start	= start;
 		    __entry->len	= len;
 		    __entry->what	= what;
 		    __entry->netfs_inode = rreq->inode->i_ino;
 			   ),
 
-	    TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx",
+	    TP_printk("R=%08x %s c=%08x ni=%x s=%llx l=%zx sz=%llx",
 		      __entry->rreq,
 		      __print_symbolic(__entry->what, netfs_read_traces),
 		      __entry->cookie,
 		      __entry->netfs_inode,
-		      __entry->start, __entry->len)
+		      __entry->start, __entry->len, __entry->i_size)
 	    );
 
 TRACE_EVENT(netfs_rreq,
@@ -651,6 +679,71 @@ TRACE_EVENT(netfs_collect_stream,
 		      __entry->collected_to, __entry->front)
 	    );
 
+TRACE_EVENT(netfs_progress,
+	    TP_PROTO(const struct netfs_io_subrequest *subreq,
+		     unsigned long long start, size_t avail, size_t part),
+
+	    TP_ARGS(subreq, start, avail, part),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		rreq)
+		    __field(unsigned int,		subreq)
+		    __field(unsigned int,		consumed)
+		    __field(unsigned int,		transferred)
+		    __field(unsigned long long,		f_start)
+		    __field(unsigned int,		f_avail)
+		    __field(unsigned int,		f_part)
+		    __field(unsigned char,		slot)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->rreq	= subreq->rreq->debug_id;
+		    __entry->subreq	= subreq->debug_index;
+		    __entry->consumed	= subreq->consumed;
+		    __entry->transferred = subreq->transferred;
+		    __entry->f_start	= start;
+		    __entry->f_avail	= avail;
+		    __entry->f_part	= part;
+		    __entry->slot	= subreq->curr_folioq_slot;
+			   ),
+
+	    TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x",
+		      __entry->rreq, __entry->subreq, __entry->f_start,
+		      __entry->consumed, __entry->transferred,
+		      __entry->f_part, __entry->f_avail,  __entry->slot)
+	    );
+
+TRACE_EVENT(netfs_donate,
+	    TP_PROTO(const struct netfs_io_request *rreq,
+		     const struct netfs_io_subrequest *from,
+		     const struct netfs_io_subrequest *to,
+		     size_t amount,
+		     enum netfs_donate_trace trace),
+
+	    TP_ARGS(rreq, from, to, amount, trace),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		rreq)
+		    __field(unsigned int,		from)
+		    __field(unsigned int,		to)
+		    __field(unsigned int,		amount)
+		    __field(enum netfs_donate_trace,	trace)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->rreq	= rreq->debug_id;
+		    __entry->from	= from->debug_index;
+		    __entry->to		= to ? to->debug_index : -1;
+		    __entry->amount	= amount;
+		    __entry->trace	= trace;
+			   ),
+
+	    TP_printk("R=%08x[%02x] -> [%02x] %s am=%x",
+		      __entry->rreq, __entry->from, __entry->to,
+		      __print_symbolic(__entry->trace, netfs_donate_traces),
+		      __entry->amount)
+	    );
+
 #undef EM
 #undef E_
 #endif /* _TRACE_NETFS_H */
-- 
cgit v1.2.3


From c4f1450ecccc5311db87f806998eda1c824c4e35 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Jul 2024 12:44:30 +0100
Subject: cachefiles, netfs: Fix write to partial block at EOF

Because it uses DIO writes, cachefiles is unable to make a write to the
backing file if that write is not aligned to and sized according to the
backing file's DIO block alignment.  This makes it tricky to handle a write
to the cache where the EOF on the network file is not correctly aligned.

To get around this, netfslib attempts to tell the driver it is calling how
much more data there is available beyond the EOF that it can use to pad the
write (netfslib preclears the part of the folio above the EOF).  However,
it tries to tell the cache what the maximum length is, but doesn't
calculate this correctly; and, in any case, cachefiles actually ignores the
value and just skips the block.

Fix this by:

 (1) Change the value passed to indicate the amount of extra data that can
     be added to the operation (now ->submit_extendable_to).  This is much
     simpler to calculate as it's just the end of the folio minus the top
     of the data within the folio - rather than having to account for data
     spread over multiple folios.

 (2) Make cachefiles add some of this data if the subrequest it is given
     ends at the network file's i_size if the extra data is sufficient to
     pad out to a whole block.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-22-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c0f0c9c87d86..5eaceef41e6c 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -135,7 +135,7 @@ struct netfs_io_stream {
 	unsigned int		sreq_max_segs;	/* 0 or max number of segments in an iterator */
 	unsigned int		submit_off;	/* Folio offset we're submitting from */
 	unsigned int		submit_len;	/* Amount of data left to submit */
-	unsigned int		submit_max_len;	/* Amount I/O can be rounded up to */
+	unsigned int		submit_extendable_to; /* Amount I/O can be rounded up to */
 	void (*prepare_write)(struct netfs_io_subrequest *subreq);
 	void (*issue_write)(struct netfs_io_subrequest *subreq);
 	/* Collection tracking */
-- 
cgit v1.2.3


From 8f246b7c0a1be0882374f2ff831a61f0dbe77678 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 29 Jul 2024 12:23:11 +0100
Subject: netfs: Cancel dirty folios that have no storage destination

Kafs wants to be able to cache the contents of directories (and symlinks),
but whilst these are downloaded from the server with the FS.FetchData RPC
op and similar, the same as for regular files, they can't be updated by
FS.StoreData, but rather have special operations (FS.MakeDir, etc.).

Now, rather than redownloading a directory's content after each change made
to that directory, kafs modifies the local blob.  This blob can be saved
out to the cache, and since it's using netfslib, kafs just marks the folios
dirty and lets ->writepages() on the directory take care of it, as for an
regular file.

This is fine as long as there's a cache as although the upload stream is
disabled, there's a cache stream to drive the procedure.  But if the cache
goes away in the meantime, suddenly there's no way do any writes and the
code gets confused, complains "R=%x: No submit" to dmesg and leaves the
dirty folio hanging.

Fix this by just cancelling the store of the folio if neither stream is
active.  (If there's no cache at the time of dirtying, we should just not
mark the folio dirty).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-23-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 7b26463cb98f..76bd42a96815 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -153,6 +153,7 @@
 	EM(netfs_streaming_cont_filled_page,	"mod-streamw-f+") \
 	EM(netfs_folio_trace_abandon,		"abandon")	\
 	EM(netfs_folio_trace_cancel_copy,	"cancel-copy")	\
+	EM(netfs_folio_trace_cancel_store,	"cancel-store")	\
 	EM(netfs_folio_trace_clear,		"clear")	\
 	EM(netfs_folio_trace_clear_cc,		"clear-cc")	\
 	EM(netfs_folio_trace_clear_g,		"clear-g")	\
-- 
cgit v1.2.3


From 2982c8c19bab020e38da9d503aa21a3b389c53ac Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 26 Jul 2024 20:03:07 +0100
Subject: cifs: Use iterate_and_advance*() routines directly for hashing

Replace the bespoke cifs iterators of ITER_BVEC and ITER_KVEC to do hashing
with iterate_and_advance_kernel() - a variant on iterate_and_advance() that
only supports kernel-internal ITER_* types and not UBUF/IOVEC types.

The bespoke ITER_XARRAY is left because we don't really want to be calling
crypto_shash_update() under the RCU read lock for large amounts of data;
besides, ITER_XARRAY is going to be phased out.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Tom Talpey <tom@talpey.com>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: linux-cifs@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-24-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iov_iter.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
index a223370a59a7..c4aa58032faf 100644
--- a/include/linux/iov_iter.h
+++ b/include/linux/iov_iter.h
@@ -328,4 +328,51 @@ size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
 	return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
 }
 
+/**
+ * iterate_and_advance_kernel - Iterate over a kernel-internal iterator
+ * @iter: The iterator to iterate over.
+ * @len: The amount to iterate over.
+ * @priv: Data for the step functions.
+ * @priv2: More data for the step functions.
+ * @step: Function for other iterators; given kernel addresses.
+ *
+ * Iterate over the next part of an iterator, up to the specified length.  The
+ * buffer is presented in segments, which for kernel iteration are broken up by
+ * physical pages and mapped, with the mapped address being presented.
+ *
+ * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type
+ * iterators; it will not handle UBUF or IOVEC-type iterators.
+ *
+ * A step functions, @step, must be provided, one for handling mapped kernel
+ * addresses and the other is given user addresses which have the potential to
+ * fault since no pinning is performed.
+ *
+ * The step functions are passed the address and length of the segment, @priv,
+ * @priv2 and the amount of data so far iterated over (which can, for example,
+ * be added to @priv to point to the right part of a second buffer).  The step
+ * functions should return the amount of the segment they didn't process (ie. 0
+ * indicates complete processsing).
+ *
+ * This function returns the amount of data processed (ie. 0 means nothing was
+ * processed and the value of @len means processes to completion).
+ */
+static __always_inline
+size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv,
+				  void *priv2, iov_step_f step)
+{
+	if (unlikely(iter->count < len))
+		len = iter->count;
+	if (unlikely(!len))
+		return 0;
+	if (iov_iter_is_bvec(iter))
+		return iterate_bvec(iter, len, priv, priv2, step);
+	if (iov_iter_is_kvec(iter))
+		return iterate_kvec(iter, len, priv, priv2, step);
+	if (iov_iter_is_folioq(iter))
+		return iterate_folioq(iter, len, priv, priv2, step);
+	if (iov_iter_is_xarray(iter))
+		return iterate_xarray(iter, len, priv, priv2, step);
+	return iterate_discard(iter, len, priv, priv2, step);
+}
+
 #endif /* _LINUX_IOV_ITER_H */
-- 
cgit v1.2.3


From dfa1a7f456f10018229d0d5b3c36dd36a9b5344f Mon Sep 17 00:00:00 2001
From: Brent Lu <brent.lu@intel.com>
Date: Thu, 12 Sep 2024 20:03:06 +0800
Subject: ASoC: SOF: Intel: hda: remove common_hdmi_codec_drv
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do not set common_hdmi_codec_drv in SOF platform driver since no
machine driver needs it. Remove member variable common_hdmi_codec_drv
from snd_soc_acpi_mach_params structure.

Signed-off-by: Brent Lu <brent.lu@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20240912120308.134762-6-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index c1552bc6e31c..60d3b86a4660 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -62,7 +62,6 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg)
  * @platform: string used for HDAudio codec support
  * @codec_mask: used for HDAudio support
  * @dmic_num: number of SoC- or chipset-attached PDM digital microphones
- * @common_hdmi_codec_drv: use commom HDAudio HDMI codec driver
  * @link_mask: SoundWire links enabled on the board
  * @links: array of SoundWire link _ADR descriptors, null terminated
  * @i2s_link_mask: I2S/TDM links enabled on the board
@@ -80,7 +79,6 @@ struct snd_soc_acpi_mach_params {
 	const char *platform;
 	u32 codec_mask;
 	u32 dmic_num;
-	bool common_hdmi_codec_drv;
 	u32 link_mask;
 	const struct snd_soc_acpi_link_adr *links;
 	u32 i2s_link_mask;
-- 
cgit v1.2.3


From c45b9a07b6392fa224ca76b89f24dae1046eef09 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 12 Sep 2024 22:30:34 +0900
Subject: Revert "firewire: core: use mutex to coordinate concurrent calls to
 flush completions"

This reverts commit d9605d67562505e27dcc0f71af418118d3db91e5, since this
commit is on the following reverted changes.

Link: https://lore.kernel.org/r/20240912133038.238786-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 19e8c5f9537c..f815d12deda0 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -512,7 +512,6 @@ union fw_iso_callback {
 struct fw_iso_context {
 	struct fw_card *card;
 	struct work_struct work;
-	struct mutex flushing_completions_mutex;
 	int type;
 	int channel;
 	int speed;
-- 
cgit v1.2.3


From 4010cb1efda08ec6fd02ec5db9da909322ef352e Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 12 Sep 2024 22:30:37 +0900
Subject: firewire: core: update documentation of kernel APIs for flushing
 completions

There is a slight difference between fw_iso_context_flush_completions() and
fw_iso_context_schedule_flush_completions().

This commit updates the documentations for them.

Link: https://lore.kernel.org/r/20240912133038.238786-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index f815d12deda0..b632eec3ab52 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -537,9 +537,11 @@ int fw_iso_context_flush_completions(struct fw_iso_context *ctx);
  * @ctx: the isochronous context
  *
  * Schedule a work item on workqueue to process the isochronous context. The registered callback
- * function is called in the worker if some packets have been already transferred since the last
- * time. If it is required to process the context in the current context,
- * fw_iso_context_flush_completions() is available instead.
+ * function is called by the worker when a queued packet buffer with the interrupt flag is
+ * completed, either after transmission in the IT context or after being filled in the IR context.
+ * The callback function is also called when the header buffer in the context becomes full, If it
+ * is required to process the context in the current context, fw_iso_context_flush_completions() is
+ * available instead.
  *
  * Context: Any context.
  */
-- 
cgit v1.2.3


From f1cba5212e252243a539e079813bc96fbf53e241 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 12 Sep 2024 22:30:38 +0900
Subject: firewire: core: rename cause flag of tracepoints event

The flag of FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ directly causes hardIRQ
request by 1394 OHCI hardware when the corresponding isochronous packet is
transferred, however it is not so directly associated to hardIRQ
processing itself.

This commit renames the flag so that it relates to interrupt parameter of
internal packet data.

Link: https://lore.kernel.org/r/20240912133038.238786-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index b108176deb22..ad0e0cf82b9c 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -830,13 +830,13 @@ TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue,
 #ifndef show_cause
 enum fw_iso_context_completions_cause {
 	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0,
-	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ,
+	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT,
 	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW,
 };
 #define show_cause(cause) 								\
 	__print_symbolic(cause,								\
 		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" },			\
-		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" },			\
+		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT, "INTERRUPT" },		\
 		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" }	\
 	)
 #endif
-- 
cgit v1.2.3


From ede5bbe488d162bcd572880e58f9044c9df84050 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:27 +0300
Subject: ARM: ep93xx: add regmap aux_dev

The following driver's should be instantiated by ep93xx syscon driver:

- reboot
- pinctrl
- clock

They all require access to DEVCFG register with a shared lock held, to
avoid conflict writing to swlocked parts of DEVCFG.

Provide common resources such as base, regmap and spinlock via auxiliary
bus framework.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/cirrus/ep93xx.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index 56fbe2dc59b1..a27447971302 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -3,6 +3,18 @@
 #define _SOC_EP93XX_H
 
 struct platform_device;
+struct regmap;
+struct spinlock_t;
+
+enum ep93xx_soc_model {
+	EP93XX_9301_SOC,
+	EP93XX_9307_SOC,
+	EP93XX_9312_SOC,
+};
+
+#include <linux/auxiliary_bus.h>
+#include <linux/compiler_types.h>
+#include <linux/container_of.h>
 
 #define EP93XX_CHIP_REV_D0	3
 #define EP93XX_CHIP_REV_D1	4
@@ -10,6 +22,20 @@ struct platform_device;
 #define EP93XX_CHIP_REV_E1	6
 #define EP93XX_CHIP_REV_E2	7
 
+struct ep93xx_regmap_adev {
+	struct auxiliary_device adev;
+	struct regmap *map;
+	void __iomem *base;
+	spinlock_t *lock;
+	void (*write)(struct regmap *map, spinlock_t *lock, unsigned int reg,
+		      unsigned int val);
+	void (*update_bits)(struct regmap *map, spinlock_t *lock,
+			    unsigned int reg, unsigned int mask, unsigned int val);
+};
+
+#define to_ep93xx_regmap_adev(_adev) \
+	container_of((_adev), struct ep93xx_regmap_adev, adev)
+
 #ifdef CONFIG_ARCH_EP93XX
 int ep93xx_pwm_acquire_gpio(struct platform_device *pdev);
 void ep93xx_pwm_release_gpio(struct platform_device *pdev);
-- 
cgit v1.2.3


From eeb3dd5b32e68989bae1a3d4420204cf3a90ce73 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:31 +0300
Subject: dt-bindings: soc: Add Cirrus EP93xx

Add device tree bindings for the Cirrus Logic EP93xx SoC.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/dt-bindings/clock/cirrus,ep9301-syscon.h | 46 ++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 include/dt-bindings/clock/cirrus,ep9301-syscon.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/cirrus,ep9301-syscon.h b/include/dt-bindings/clock/cirrus,ep9301-syscon.h
new file mode 100644
index 000000000000..6bb8f532e7d0
--- /dev/null
+++ b/include/dt-bindings/clock/cirrus,ep9301-syscon.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+#ifndef DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H
+#define DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H
+
+#define EP93XX_CLK_PLL1		0
+#define EP93XX_CLK_PLL2		1
+
+#define EP93XX_CLK_FCLK		2
+#define EP93XX_CLK_HCLK		3
+#define EP93XX_CLK_PCLK		4
+
+#define EP93XX_CLK_UART		5
+#define EP93XX_CLK_SPI		6
+#define EP93XX_CLK_PWM		7
+#define EP93XX_CLK_USB		8
+
+#define EP93XX_CLK_M2M0		9
+#define EP93XX_CLK_M2M1		10
+
+#define EP93XX_CLK_M2P0		11
+#define EP93XX_CLK_M2P1		12
+#define EP93XX_CLK_M2P2		13
+#define EP93XX_CLK_M2P3		14
+#define EP93XX_CLK_M2P4		15
+#define EP93XX_CLK_M2P5		16
+#define EP93XX_CLK_M2P6		17
+#define EP93XX_CLK_M2P7		18
+#define EP93XX_CLK_M2P8		19
+#define EP93XX_CLK_M2P9		20
+
+#define EP93XX_CLK_UART1	21
+#define EP93XX_CLK_UART2	22
+#define EP93XX_CLK_UART3	23
+
+#define EP93XX_CLK_ADC		24
+#define EP93XX_CLK_ADC_EN	25
+
+#define EP93XX_CLK_KEYPAD	26
+
+#define EP93XX_CLK_VIDEO	27
+
+#define EP93XX_CLK_I2S_MCLK	28
+#define EP93XX_CLK_I2S_SCLK	29
+#define EP93XX_CLK_I2S_LRCLK	30
+
+#endif /* DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H */
-- 
cgit v1.2.3


From 2e7f55ce430240a5547b8a94b4c532fc8c20b18b Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:34 +0300
Subject: dmaengine: cirrus: Convert to DT for Cirrus EP93xx

Convert Cirrus EP93xx DMA to device tree usage:

- add OF ID match table with data
- add of_probe for device tree
- add xlate for m2m/m2p
- drop subsys_initcall code
- drop platform probe
- drop platform structs usage

>From now on it only supports device tree probing.

Co-developed-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/platform_data/dma-ep93xx.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h
index eb9805bb3fe8..9ec5cdd5a1eb 100644
--- a/include/linux/platform_data/dma-ep93xx.h
+++ b/include/linux/platform_data/dma-ep93xx.h
@@ -3,8 +3,11 @@
 #define __ASM_ARCH_DMA_H
 
 #include <linux/types.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/property.h>
+#include <linux/string.h>
 
 /*
  * M2P channels.
@@ -70,6 +73,9 @@ struct ep93xx_dma_platform_data {
 
 static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
 {
+	if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p"))
+		return true;
+
 	return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p");
 }
 
-- 
cgit v1.2.3


From b3ab5787e7acb02874ae86cbe13969d4dd01a585 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:48 +0300
Subject: input: keypad: ep93xx: add DT support for Cirrus EP93xx

- drop flags, they were not used anyway
- add OF ID match table
- process "autorepeat", "debounce-delay-ms", prescale from device tree
- drop platform data usage and it's header
- keymap goes from device tree now on

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/cirrus/ep93xx.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index a27447971302..8942bfaf1545 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -41,8 +41,6 @@ int ep93xx_pwm_acquire_gpio(struct platform_device *pdev);
 void ep93xx_pwm_release_gpio(struct platform_device *pdev);
 int ep93xx_ide_acquire_gpio(struct platform_device *pdev);
 void ep93xx_ide_release_gpio(struct platform_device *pdev);
-int ep93xx_keypad_acquire_gpio(struct platform_device *pdev);
-void ep93xx_keypad_release_gpio(struct platform_device *pdev);
 int ep93xx_i2s_acquire(void);
 void ep93xx_i2s_release(void);
 unsigned int ep93xx_chip_revision(void);
@@ -52,8 +50,6 @@ static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return
 static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {}
 static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; }
 static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {}
-static inline int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_keypad_release_gpio(struct platform_device *pdev) {}
 static inline int ep93xx_i2s_acquire(void) { return 0; }
 static inline void ep93xx_i2s_release(void) {}
 static inline unsigned int ep93xx_chip_revision(void) { return 0; }
-- 
cgit v1.2.3


From a48ac3dc569771c18fcafbc8351d820cc343c54a Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:58 +0300
Subject: pwm: ep93xx: drop legacy pinctrl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop legacy gpio request/free since we are using
pinctrl for this now.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/cirrus/ep93xx.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index 8942bfaf1545..f6376edc1b33 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -37,8 +37,6 @@ struct ep93xx_regmap_adev {
 	container_of((_adev), struct ep93xx_regmap_adev, adev)
 
 #ifdef CONFIG_ARCH_EP93XX
-int ep93xx_pwm_acquire_gpio(struct platform_device *pdev);
-void ep93xx_pwm_release_gpio(struct platform_device *pdev);
 int ep93xx_ide_acquire_gpio(struct platform_device *pdev);
 void ep93xx_ide_release_gpio(struct platform_device *pdev);
 int ep93xx_i2s_acquire(void);
@@ -46,8 +44,6 @@ void ep93xx_i2s_release(void);
 unsigned int ep93xx_chip_revision(void);
 
 #else
-static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {}
 static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; }
 static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {}
 static inline int ep93xx_i2s_acquire(void) { return 0; }
-- 
cgit v1.2.3


From a632229be268dde8f6d407638b5cfba8b78201d6 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:10:59 +0300
Subject: ata: pata_ep93xx: remove legacy pinctrl use

Drop legacy acquire/release since we are using pinctrl for this now.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Damien Le Moal <dlemoal@kernel.org>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/cirrus/ep93xx.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index f6376edc1b33..142c33a2d7db 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -37,15 +37,11 @@ struct ep93xx_regmap_adev {
 	container_of((_adev), struct ep93xx_regmap_adev, adev)
 
 #ifdef CONFIG_ARCH_EP93XX
-int ep93xx_ide_acquire_gpio(struct platform_device *pdev);
-void ep93xx_ide_release_gpio(struct platform_device *pdev);
 int ep93xx_i2s_acquire(void);
 void ep93xx_i2s_release(void);
 unsigned int ep93xx_chip_revision(void);
 
 #else
-static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {}
 static inline int ep93xx_i2s_acquire(void) { return 0; }
 static inline void ep93xx_i2s_release(void) {}
 static inline unsigned int ep93xx_chip_revision(void) { return 0; }
-- 
cgit v1.2.3


From e5ef574dda702e62081d5c7991949cbdb7f65c08 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:11:00 +0300
Subject: ARM: ep93xx: delete all boardfiles

Delete the ep93xx board files.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/platform_data/spi-ep93xx.h | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 include/linux/platform_data/spi-ep93xx.h

(limited to 'include')

diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h
deleted file mode 100644
index b439f2a896e0..000000000000
--- a/include/linux/platform_data/spi-ep93xx.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_EP93XX_SPI_H
-#define __ASM_MACH_EP93XX_SPI_H
-
-struct spi_device;
-
-/**
- * struct ep93xx_spi_info - EP93xx specific SPI descriptor
- * @use_dma: use DMA for the transfers
- */
-struct ep93xx_spi_info {
-	bool	use_dma;
-};
-
-#endif /* __ASM_MACH_EP93XX_SPI_H */
-- 
cgit v1.2.3


From 43528a72526152f17a918973b3b8b6f383b90f98 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:11:01 +0300
Subject: ARM: ep93xx: soc: drop defines

Remove unnecessary defines, as we dropped board files.

Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/platform_data/eth-ep93xx.h    | 10 ---------
 include/linux/platform_data/keypad-ep93xx.h | 32 -----------------------------
 include/linux/soc/cirrus/ep93xx.h           | 13 ------------
 3 files changed, 55 deletions(-)
 delete mode 100644 include/linux/platform_data/eth-ep93xx.h
 delete mode 100644 include/linux/platform_data/keypad-ep93xx.h

(limited to 'include')

diff --git a/include/linux/platform_data/eth-ep93xx.h b/include/linux/platform_data/eth-ep93xx.h
deleted file mode 100644
index 8eef637a804d..000000000000
--- a/include/linux/platform_data/eth-ep93xx.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_PLATFORM_DATA_ETH_EP93XX
-#define _LINUX_PLATFORM_DATA_ETH_EP93XX
-
-struct ep93xx_eth_data {
-	unsigned char	dev_addr[6];
-	unsigned char	phy_id;
-};
-
-#endif
diff --git a/include/linux/platform_data/keypad-ep93xx.h b/include/linux/platform_data/keypad-ep93xx.h
deleted file mode 100644
index 3054fced8509..000000000000
--- a/include/linux/platform_data/keypad-ep93xx.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __KEYPAD_EP93XX_H
-#define __KEYPAD_EP93XX_H
-
-struct matrix_keymap_data;
-
-/* flags for the ep93xx_keypad driver */
-#define EP93XX_KEYPAD_DISABLE_3_KEY	(1<<0)	/* disable 3-key reset */
-#define EP93XX_KEYPAD_DIAG_MODE		(1<<1)	/* diagnostic mode */
-#define EP93XX_KEYPAD_BACK_DRIVE	(1<<2)	/* back driving mode */
-#define EP93XX_KEYPAD_TEST_MODE		(1<<3)	/* scan only column 0 */
-#define EP93XX_KEYPAD_AUTOREPEAT	(1<<4)	/* enable key autorepeat */
-
-/**
- * struct ep93xx_keypad_platform_data - platform specific device structure
- * @keymap_data:	pointer to &matrix_keymap_data
- * @debounce:		debounce start count; terminal count is 0xff
- * @prescale:		row/column counter pre-scaler load value
- * @flags:		see above
- */
-struct ep93xx_keypad_platform_data {
-	struct matrix_keymap_data *keymap_data;
-	unsigned int	debounce;
-	unsigned int	prescale;
-	unsigned int	flags;
-	unsigned int	clk_rate;
-};
-
-#define EP93XX_MATRIX_ROWS		(8)
-#define EP93XX_MATRIX_COLS		(8)
-
-#endif	/* __KEYPAD_EP93XX_H */
diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index 142c33a2d7db..3e6cf2b25a97 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -2,7 +2,6 @@
 #ifndef _SOC_EP93XX_H
 #define _SOC_EP93XX_H
 
-struct platform_device;
 struct regmap;
 struct spinlock_t;
 
@@ -36,16 +35,4 @@ struct ep93xx_regmap_adev {
 #define to_ep93xx_regmap_adev(_adev) \
 	container_of((_adev), struct ep93xx_regmap_adev, adev)
 
-#ifdef CONFIG_ARCH_EP93XX
-int ep93xx_i2s_acquire(void);
-void ep93xx_i2s_release(void);
-unsigned int ep93xx_chip_revision(void);
-
-#else
-static inline int ep93xx_i2s_acquire(void) { return 0; }
-static inline void ep93xx_i2s_release(void) {}
-static inline unsigned int ep93xx_chip_revision(void) { return 0; }
-
-#endif
-
 #endif
-- 
cgit v1.2.3


From a015b1828653b591de0aa5303c0dbc4235935f94 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <nikita.shubin@maquefel.me>
Date: Mon, 9 Sep 2024 11:11:03 +0300
Subject: dmaengine: cirrus: remove platform code

Remove DMA platform header, from now on we use device tree for DMA
clients.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Nikita Shubin <nikita.shubin@maquefel.me>
Tested-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/platform_data/dma-ep93xx.h | 100 -------------------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 include/linux/platform_data/dma-ep93xx.h

(limited to 'include')

diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h
deleted file mode 100644
index 9ec5cdd5a1eb..000000000000
--- a/include/linux/platform_data/dma-ep93xx.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARCH_DMA_H
-#define __ASM_ARCH_DMA_H
-
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/property.h>
-#include <linux/string.h>
-
-/*
- * M2P channels.
- *
- * Note that these values are also directly used for setting the PPALLOC
- * register.
- */
-#define EP93XX_DMA_I2S1		0
-#define EP93XX_DMA_I2S2		1
-#define EP93XX_DMA_AAC1		2
-#define EP93XX_DMA_AAC2		3
-#define EP93XX_DMA_AAC3		4
-#define EP93XX_DMA_I2S3		5
-#define EP93XX_DMA_UART1	6
-#define EP93XX_DMA_UART2	7
-#define EP93XX_DMA_UART3	8
-#define EP93XX_DMA_IRDA		9
-/* M2M channels */
-#define EP93XX_DMA_SSP		10
-#define EP93XX_DMA_IDE		11
-
-/**
- * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine
- * @port: peripheral which is requesting the channel
- * @direction: TX/RX channel
- * @name: optional name for the channel, this is displayed in /proc/interrupts
- *
- * This information is passed as private channel parameter in a filter
- * function. Note that this is only needed for slave/cyclic channels.  For
- * memcpy channels %NULL data should be passed.
- */
-struct ep93xx_dma_data {
-	int				port;
-	enum dma_transfer_direction	direction;
-	const char			*name;
-};
-
-/**
- * struct ep93xx_dma_chan_data - platform specific data for a DMA channel
- * @name: name of the channel, used for getting the right clock for the channel
- * @base: mapped registers
- * @irq: interrupt number used by this channel
- */
-struct ep93xx_dma_chan_data {
-	const char			*name;
-	void __iomem			*base;
-	int				irq;
-};
-
-/**
- * struct ep93xx_dma_platform_data - platform data for the dmaengine driver
- * @channels: array of channels which are passed to the driver
- * @num_channels: number of channels in the array
- *
- * This structure is passed to the DMA engine driver via platform data. For
- * M2P channels, contract is that even channels are for TX and odd for RX.
- * There is no requirement for the M2M channels.
- */
-struct ep93xx_dma_platform_data {
-	struct ep93xx_dma_chan_data	*channels;
-	size_t				num_channels;
-};
-
-static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
-{
-	if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p"))
-		return true;
-
-	return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p");
-}
-
-/**
- * ep93xx_dma_chan_direction - returns direction the channel can be used
- * @chan: channel
- *
- * This function can be used in filter functions to find out whether the
- * channel supports given DMA direction. Only M2P channels have such
- * limitation, for M2M channels the direction is configurable.
- */
-static inline enum dma_transfer_direction
-ep93xx_dma_chan_direction(struct dma_chan *chan)
-{
-	if (!ep93xx_dma_chan_is_m2p(chan))
-		return DMA_TRANS_NONE;
-
-	/* even channels are for TX, odd for RX */
-	return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
-}
-
-#endif /* __ASM_ARCH_DMA_H */
-- 
cgit v1.2.3


From 8d8081cecfb9940beeb4a8a700db34e615a96056 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 5 Sep 2024 15:35:46 -0700
Subject: cxl: Move mailbox related bits to the same context

Create a new 'struct cxl_mailbox' and move all mailbox related bits to
it. This allows isolation of all CXL mailbox data in order to export
some of the calls to external kernel callers and avoid exporting of CXL
driver specific bits such has device states. The allocation of
'struct cxl_mailbox' is also split out with cxl_mailbox_init() so the
mailbox can be created independently.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/20240905223711.1990186-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/cxl/mailbox.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/cxl/mailbox.h

(limited to 'include')

diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h
new file mode 100644
index 000000000000..bacd111e75f1
--- /dev/null
+++ b/include/cxl/mailbox.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation. */
+#ifndef __CXL_MBOX_H__
+#define __CXL_MBOX_H__
+#include <linux/rcuwait.h>
+
+struct cxl_mbox_cmd;
+
+/**
+ * struct cxl_mailbox - context for CXL mailbox operations
+ * @host: device that hosts the mailbox
+ * @payload_size: Size of space for payload
+ *                (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register)
+ * @mbox_mutex: mutex protects device mailbox and firmware
+ * @mbox_wait: rcuwait for mailbox
+ * @mbox_send: @dev specific transport for transmitting mailbox commands
+ */
+struct cxl_mailbox {
+	struct device *host;
+	size_t payload_size;
+	struct mutex mbox_mutex; /* lock to protect mailbox context */
+	struct rcuwait mbox_wait;
+	int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd);
+};
+
+int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host);
+
+#endif
-- 
cgit v1.2.3


From 7cc2a6eadcd7a5aa36ac63e6659f5c6138c7f4d2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 11 Sep 2024 13:56:08 -0600
Subject: io_uring: add IORING_REGISTER_COPY_BUFFERS method

Buffers can get registered with io_uring, which allows to skip the
repeated pin_pages, unpin/unref pages for each O_DIRECT operation. This
reduces the overhead of O_DIRECT IO.

However, registrering buffers can take some time. Normally this isn't an
issue as it's done at initialization time (and hence less critical), but
for cases where rings can be created and destroyed as part of an IO
thread pool, registering the same buffers for multiple rings become a
more time sensitive proposition. As an example, let's say an application
has an IO memory pool of 500G. Initial registration takes:

Got 500 huge pages (each 1024MB)
Registered 500 pages in 409 msec

or about 0.4 seconds. If we go higher to 900 1GB huge pages being
registered:

Registered 900 pages in 738 msec

which is, as expected, a fully linear scaling.

Rather than have each ring pin/map/register the same buffer pool,
provide an io_uring_register(2) opcode to simply duplicate the buffers
that are registered with another ring. Adding the same 900GB of
registered buffers to the target ring can then be accomplished in:

Copied 900 pages in 17 usec

While timing differs a bit, this provides around a 25,000-40,000x
speedup for this use case.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index a275f91d2ac0..9dc5bb428c8a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -609,6 +609,9 @@ enum io_uring_register_op {
 
 	IORING_REGISTER_CLOCK			= 29,
 
+	/* copy registered buffers from source ring to current ring */
+	IORING_REGISTER_COPY_BUFFERS		= 30,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -694,6 +697,16 @@ struct io_uring_clock_register {
 	__u32	__resv[3];
 };
 
+enum {
+	IORING_REGISTER_SRC_REGISTERED = 1,
+};
+
+struct io_uring_copy_buffers {
+	__u32	src_fd;
+	__u32	flags;
+	__u32	pad[6];
+};
+
 struct io_uring_buf {
 	__u64	addr;
 	__u32	len;
-- 
cgit v1.2.3


From 433d7ce2d86d21274838c9e8c796f4232cd13cdb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 6 Aug 2024 15:38:12 -0700
Subject: security,bpf: constify struct path in bpf_token_create() LSM hook

There is no reason why struct path pointer shouldn't be const-qualified
when being passed into bpf_token_create() LSM hook. Add that const.

Acked-by: Paul Moore <paul@paul-moore.com> (LSM/SELinux)
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/lsm_hook_defs.h | 2 +-
 include/linux/security.h      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 855db460e08b..462b55378241 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -431,7 +431,7 @@ LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
 	 struct bpf_token *token)
 LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
 LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
-	 struct path *path)
+	 const struct path *path)
 LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
 LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
 LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
diff --git a/include/linux/security.h b/include/linux/security.h
index 1390f1efb4f0..31523a2c71c4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2137,7 +2137,7 @@ extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
 				  struct bpf_token *token);
 extern void security_bpf_prog_free(struct bpf_prog *prog);
 extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-				     struct path *path);
+				     const struct path *path);
 extern void security_bpf_token_free(struct bpf_token *token);
 extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
 extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
@@ -2177,7 +2177,7 @@ static inline void security_bpf_prog_free(struct bpf_prog *prog)
 { }
 
 static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-				     struct path *path)
+					    const struct path *path)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From b2155807893aac40f1a1cdf43f7fcc270cbfc05a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 10 Sep 2024 17:21:42 -0700
Subject: uapi: libc-compat: remove ipx leftovers

The uAPI headers for IPX were deleted 3 years ago in
commit 6c9b40844751 ("net: Remove net/ipx.h and uapi/linux/ipx.h header files")
Delete the leftover defines from libc-compat.h

Link: https://patch.msgid.link/20240911002142.1508694-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/libc-compat.h | 36 ------------------------------------
 1 file changed, 36 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 8254c937c9f4..0eca95ccb41e 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -140,25 +140,6 @@
 
 #endif /* _NETINET_IN_H */
 
-/* Coordinate with glibc netipx/ipx.h header. */
-#if defined(__NETIPX_IPX_H)
-
-#define __UAPI_DEF_SOCKADDR_IPX			0
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION		0
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	0
-#define __UAPI_DEF_IPX_CONFIG_DATA		0
-#define __UAPI_DEF_IPX_ROUTE_DEF		0
-
-#else /* defined(__NETIPX_IPX_H) */
-
-#define __UAPI_DEF_SOCKADDR_IPX			1
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION		1
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	1
-#define __UAPI_DEF_IPX_CONFIG_DATA		1
-#define __UAPI_DEF_IPX_ROUTE_DEF		1
-
-#endif /* defined(__NETIPX_IPX_H) */
-
 /* Definitions for xattr.h */
 #if defined(_SYS_XATTR_H)
 #define __UAPI_DEF_XATTR		0
@@ -240,23 +221,6 @@
 #define __UAPI_DEF_IP6_MTUINFO		1
 #endif
 
-/* Definitions for ipx.h */
-#ifndef __UAPI_DEF_SOCKADDR_IPX
-#define __UAPI_DEF_SOCKADDR_IPX			1
-#endif
-#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION		1
-#endif
-#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	1
-#endif
-#ifndef __UAPI_DEF_IPX_CONFIG_DATA
-#define __UAPI_DEF_IPX_CONFIG_DATA		1
-#endif
-#ifndef __UAPI_DEF_IPX_ROUTE_DEF
-#define __UAPI_DEF_IPX_ROUTE_DEF		1
-#endif
-
 /* Definitions for xattr.h */
 #ifndef __UAPI_DEF_XATTR
 #define __UAPI_DEF_XATTR		1
-- 
cgit v1.2.3


From da2f660b3ba1be33310452959ab72d1d7ce39350 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Wed, 11 Sep 2024 13:17:46 -0700
Subject: net/mlx5: fs, make get_root_namespace API function

As preparation for HW Steering support, where the function
get_root_namespace() is needed to get root FDB, make it an API function
and rename it to mlx5_get_root_namespace().

Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Link: https://patch.msgid.link/20240911201757.1505453-5-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 3fb428ce7d1c..b744e554f014 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -342,4 +342,7 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 				  struct mlx5_pkt_reformat *reformat);
 
 u32 mlx5_flow_table_id(struct mlx5_flow_table *ft);
+
+struct mlx5_flow_root_namespace *
+mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type);
 #endif
-- 
cgit v1.2.3


From 9947204cdad97d22d171039019a4aad4d6899cdd Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Wed, 11 Sep 2024 13:17:51 -0700
Subject: net/mlx5: Add device cap for supporting hot reset in sync reset flow

New devices with new FW can support sync reset for firmware activate
using hot reset. Add capability for supporting it and add MFRL field to
query from FW which type of PCI reset method to use while handling sync
reset events.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/20240911201757.1505453-10-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2045575b70d4..620a5c305123 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1856,7 +1856,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_328[0x2];
 	u8	   relaxed_ordering_read[0x1];
 	u8         log_max_pd[0x5];
-	u8         reserved_at_330[0x6];
+	u8         reserved_at_330[0x5];
+	u8         pcie_reset_using_hotreset_method[0x1];
 	u8         pci_sync_for_fw_update_with_driver_unload[0x1];
 	u8         vnic_env_cnt_steering_fail[0x1];
 	u8         vport_counter_local_loopback[0x1];
@@ -11188,6 +11189,11 @@ struct mlx5_ifc_mcda_reg_bits {
 	u8         data[][0x20];
 };
 
+enum {
+	MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE = 0,
+	MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET = 1,
+};
+
 enum {
 	MLX5_MFRL_REG_RESET_STATE_IDLE = 0,
 	MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1,
@@ -11215,7 +11221,8 @@ struct mlx5_ifc_mfrl_reg_bits {
 	u8         pci_sync_for_fw_update_start[0x1];
 	u8         pci_sync_for_fw_update_resp[0x2];
 	u8         rst_type_sel[0x3];
-	u8         reserved_at_28[0x4];
+	u8         pci_reset_req_method[0x3];
+	u8         reserved_at_2b[0x1];
 	u8         reset_state[0x4];
 	u8         reset_type[0x8];
 	u8         reset_level[0x8];
-- 
cgit v1.2.3


From 5bd877093fd0b2e9e5f0c03b466669f761b5849c Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Wed, 11 Sep 2024 13:17:55 -0700
Subject: net/mlx5: Add NOT_READY command return status

Add a new command status MLX5_CMD_STAT_NOT_READY to handle cases
where the firmware is not ready.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Link: https://patch.msgid.link/20240911201757.1505453-14-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index a94bc9e3af96..d0f7d1f36c5e 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1449,6 +1449,7 @@ enum {
 	MLX5_CMD_STAT_BAD_SYS_STATE_ERR		= 0x4,
 	MLX5_CMD_STAT_BAD_RES_ERR		= 0x5,
 	MLX5_CMD_STAT_RES_BUSY			= 0x6,
+	MLX5_CMD_STAT_NOT_READY			= 0x7,
 	MLX5_CMD_STAT_LIM_ERR			= 0x8,
 	MLX5_CMD_STAT_BAD_RES_STATE_ERR		= 0x9,
 	MLX5_CMD_STAT_IX_ERR			= 0xa,
-- 
cgit v1.2.3


From 52fa3b6532ec6f3a1e39bf869b304d3560dd983b Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 13 Sep 2024 03:28:24 +0000
Subject: memory-provider: fix compilation issue without SYSFS

When CONFIG_SYSFS is not set, the kernel fails to compile:

     net/core/page_pool_user.c:368:45: error: implicit declaration of function 'get_netdev_rx_queue_index' [-Werror=implicit-function-declaration]
      368 |                 if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
          |                                             ^~~~~~~~~~~~~~~~~~~~~~~~~

When CONFIG_SYSFS is not set, get_netdev_rx_queue_index() is not defined
as well.

Fix by removing the ifdef around get_netdev_rx_queue_index(). It is not
needed anymore after commit e817f85652c1 ("xdp: generic XDP handling of
xdp_rxq_info") removed most of the CONFIG_SYSFS ifdefs.

Fixes: 0f9214046893 ("memory-provider: dmabuf devmem memory provider")
Cc: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Link: https://patch.msgid.link/20240913032824.2117095-1-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netdev_rx_queue.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index ac34f5fb4f71..596836abf7bf 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -45,7 +45,6 @@ __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
 	return dev->_rx + rxq;
 }
 
-#ifdef CONFIG_SYSFS
 static inline unsigned int
 get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
 {
@@ -55,7 +54,6 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
 	BUG_ON(index >= dev->num_rx_queues);
 	return index;
 }
-#endif
 
 int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
 
-- 
cgit v1.2.3


From 907936b6f4e630718cc31ddea79cc76a3e32080a Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Mon, 9 Sep 2024 13:05:04 +0300
Subject: net/mlx5: Handle memory scheme ODP capabilities

When running over new FW that supports the new memory scheme ODP, set
the cap in the FW to signal the FW we are working in the new scheme.

In the memory scheme ODP the per_transport_service capabilities are RO
for the driver so we skip their setting.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909100504.29797-9-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 154095256d0d..57c9b18c3adb 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1389,9 +1389,13 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
 
-#define MLX5_CAP_ODP_SCHEME(mdev, cap)                       \
-	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \
-		 transport_page_fault_scheme_cap.cap)
+#define MLX5_CAP_ODP_SCHEME(mdev, cap)                                \
+	(MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur,         \
+		  mem_page_fault) ?                                   \
+		 MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \
+			  memory_page_fault_scheme_cap.cap) :         \
+		 MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \
+			  transport_page_fault_scheme_cap.cap))
 
 #define MLX5_CAP_ODP_MAX(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap)
-- 
cgit v1.2.3


From 8d159eb2117b2e3697a31785662b653938f007cb Mon Sep 17 00:00:00 2001
From: Chiara Meiohas <cmeiohas@nvidia.com>
Date: Mon, 9 Sep 2024 20:30:23 +0300
Subject: RDMA/mlx5: Use IB set_netdev and get_netdev functions

The IB layer provides a common interface to store and get net
devices associated to an IB device port (ib_device_set_netdev()
and ib_device_get_netdev()).
Previously, mlx5_ib stored and managed the associated net devices
internally.

Replace internal net device management in mlx5_ib with
ib_device_set_netdev() when attaching/detaching  a net device and
ib_device_get_netdev() when retrieving the net device.

Export ib_device_get_netdev().

For mlx5 representors/PFs/VFs and lag creation we replace the netdev
assignments with the IB set/get netdev functions.

In active-backup mode lag the active slave net device is stored in the
lag itself. To assure the net device stored in a lag bond IB device is
the active slave we implement the following:
- mlx5_core: when modifying the slave of a bond we send the internal driver event
  MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE.
- mlx5_ib: when catching the event call ib_device_set_netdev()

This patch also ensures the correct IB events are sent in switchdev lag.

While at it, when in multiport eswitch mode, only a single IB device is
created for all ports. The said IB device will receive all netdev events
of its VFs once loaded, thus to avoid overwriting the mapping of PF IB
device to PF netdev, ignore NETDEV_REGISTER events if the ib device has
already been mapped to a netdev.

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909173025.30422-6-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h | 1 +
 include/linux/mlx5/driver.h | 2 +-
 include/rdma/ib_verbs.h     | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 57c9b18c3adb..dac33cfe9c0c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -371,6 +371,7 @@ enum mlx5_driver_event {
 	MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
 	MLX5_DRIVER_EVENT_AFFILIATION_DONE,
 	MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+	MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a96438ded15f..46a7a3d11048 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -643,6 +643,7 @@ struct mlx5_priv {
 	struct mlx5_sf_hw_table *sf_hw_table;
 	struct mlx5_sf_table *sf_table;
 #endif
+	struct blocking_notifier_head lag_nh;
 };
 
 enum mlx5_device_state {
@@ -1181,7 +1182,6 @@ bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
-struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 			   struct net_device *slave);
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a1dcf812d787..aa8ede439905 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4453,6 +4453,8 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
 					    const struct sockaddr *addr);
 int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 			 unsigned int port);
+struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
+					u32 port);
 struct ib_wq *ib_create_wq(struct ib_pd *pd,
 			   struct ib_wq_init_attr *init_attr);
 int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
-- 
cgit v1.2.3


From 9cbed5aab5aeea420d0aa945733bf608449d44fb Mon Sep 17 00:00:00 2001
From: Chiara Meiohas <cmeiohas@nvidia.com>
Date: Mon, 9 Sep 2024 20:30:24 +0300
Subject: RDMA/nldev: Add support for RDMA monitoring

Introduce a new netlink command to allow rdma event monitoring.
The rdma events supported now are IB device
registration/unregistration and net device attachment/detachment.

Example output of rdma monitor and the commands which trigger
the events:

$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER]	dev 1 rocep8s0f1
[UNREGISTER]	dev 0 rocep8s0f0

$ modprobe mlx5_ib
[REGISTER]	dev 2 mlx5_0
[NETDEV_ATTACH]	dev 2 mlx5_0 port 1 netdev 4 eth2
[REGISTER]	dev 3 mlx5_1
[NETDEV_ATTACH]	dev 3 mlx5_1 port 1 netdev 5 eth3

$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER]	dev 2 rocep8s0f0
[REGISTER]	dev 4 mlx5_0
[NETDEV_ATTACH]	dev 4 mlx5_0 port 30 netdev 4 eth2

$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 2 netdev 7 eth4
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 3 netdev 8 eth5
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 4 netdev 9 eth6
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 5 netdev 10 eth7
[REGISTER]	dev 5 mlx5_0
[NETDEV_ATTACH]	dev 5 mlx5_0 port 1 netdev 11 eth8
[REGISTER]	dev 6 mlx5_0
[NETDEV_ATTACH]	dev 6 mlx5_0 port 1 netdev 12 eth9
[REGISTER]	dev 7 mlx5_0
[NETDEV_ATTACH]	dev 7 mlx5_0 port 1 netdev 13 eth10
[REGISTER]	dev 8 mlx5_0
[NETDEV_ATTACH]	dev 8 mlx5_0 port 1 netdev 14 eth11

$ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
[UNREGISTER]	dev 5 rocep8s0f0v0
[UNREGISTER]	dev 6 rocep8s0f0v1
[UNREGISTER]	dev 7 rocep8s0f0v2
[UNREGISTER]	dev 8 rocep8s0f0v3
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 2
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 3
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 4
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 5

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909173025.30422-7-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/rdma_netlink.h      | 12 ++++++++++++
 include/uapi/rdma/rdma_netlink.h | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c2a79aeee113..326deaf56d5d 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -6,6 +6,8 @@
 #include <linux/netlink.h>
 #include <uapi/rdma/rdma_netlink.h>
 
+struct ib_device;
+
 enum {
 	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
 	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
@@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
  */
 bool rdma_nl_chk_listeners(unsigned int group);
 
+/**
+ * Prepare and send an event message
+ * @ib: the IB device which triggered the event
+ * @port_num: the port number which triggered the event - 0 if unused
+ * @type: the event type
+ * Returns 0 on success or a negative error code
+ */
+int rdma_nl_notify_event(struct ib_device *ib, u32 port_num,
+			 enum rdma_nl_notify_event_type type);
+
 struct rdma_link_ops {
 	struct list_head list;
 	const char *type;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2f37568f5556..5f9636d26050 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -15,6 +15,7 @@ enum {
 enum {
 	RDMA_NL_GROUP_IWPM = 2,
 	RDMA_NL_GROUP_LS,
+	RDMA_NL_GROUP_NOTIFY,
 	RDMA_NL_NUM_GROUPS
 };
 
@@ -305,6 +306,8 @@ enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_DELDEV,
 
+	RDMA_NLDEV_CMD_MONITOR,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -574,6 +577,8 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,	/* u8 */
 
+	RDMA_NLDEV_ATTR_EVENT_TYPE,		/* u8 */
+
 	/*
 	 * Always the end
 	 */
@@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type {
 	RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
 };
 
+/*
+ * Supported rdma monitoring event types.
+ */
+enum rdma_nl_notify_event_type {
+	RDMA_REGISTER_EVENT,
+	RDMA_UNREGISTER_EVENT,
+	RDMA_NETDEV_ATTACH_EVENT,
+	RDMA_NETDEV_DETACH_EVENT,
+};
+
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 12fb1153c53bf9b53e299c9775b84fa7838640f7 Mon Sep 17 00:00:00 2001
From: Chiara Meiohas <cmeiohas@nvidia.com>
Date: Mon, 9 Sep 2024 20:30:25 +0300
Subject: RDMA/nldev: Expose whether RDMA monitoring is supported

Extend the "rdma sys" command to display whether RDMA
monitoring is supported.

RDMA monitoring is not supported in mlx4 because it does
not use the ib_device_set_netdev() API, which sends the
RDMA events.

Example output for kernel where monitoring is supported:
$ rdma sys show
netns shared privileged-qkey off monitor on copy-on-fork on

Example output for kernel where monitoring is not supported:
$ rdma sys show
netns shared privileged-qkey off monitor off copy-on-fork on

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909173025.30422-8-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/rdma_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 5f9636d26050..39be09c0ffbb 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -579,6 +579,7 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_ATTR_EVENT_TYPE,		/* u8 */
 
+	RDMA_NLDEV_SYS_ATTR_MONITOR_MODE,	/* u8 */
 	/*
 	 * Always the end
 	 */
-- 
cgit v1.2.3


From 96f8052822e03c6f49b6b28fc1d6e5e0522ecbb9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 12 Sep 2024 15:39:54 -0700
Subject: locking/mutex: Define mutex_init() once

With CONFIG_PREEMPT_RT disabled __mutex_init() is a function. With
CONFIG_PREEMPT_RT enabled, __mutex_init() is a macro. I assume this is why
mutex_init() is defined twice as exactly the same macro.

Prepare for introducing a new macro for mutex initialization by combining
the two identical mutex_init() definitions into a single definition. This
patch does not change any functionality because the C preprocessor expands
macros when it encounters the macro name and not when a macro definition
is encountered. See also commit bb630f9f7a7d ("locking/rtmutex: Add mutex
variant for RT").

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240912223956.3554086-2-bvanassche@acm.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mutex.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a561c629d89f..ef617089db19 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -49,7 +49,6 @@ static inline void mutex_destroy(struct mutex *lock) {}
 
 #endif
 
-#ifndef CONFIG_PREEMPT_RT
 /**
  * mutex_init - initialize the mutex
  * @mutex: the mutex to be initialized
@@ -65,6 +64,7 @@ do {									\
 	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
 
+#ifndef CONFIG_PREEMPT_RT
 #define __MUTEX_INITIALIZER(lockname) \
 		{ .owner = ATOMIC_LONG_INIT(0) \
 		, .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
@@ -111,12 +111,6 @@ do {							\
 	__mutex_rt_init((mutex), name, key);		\
 } while (0)
 
-#define mutex_init(mutex)				\
-do {							\
-	static struct lock_class_key __key;		\
-							\
-	__mutex_init((mutex), #mutex, &__key);		\
-} while (0)
 #endif /* CONFIG_PREEMPT_RT */
 
 #ifdef CONFIG_DEBUG_MUTEXES
-- 
cgit v1.2.3


From e837d833a13461c10f265a65ce6612e6dd43e76f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 12 Sep 2024 15:39:55 -0700
Subject: locking/mutex: Introduce mutex_init_with_key()

The following pattern occurs 5 times in kernel drivers:

	lockdep_register_key(key);
	__mutex_init(mutex, name, key);

In several cases the 'name' argument matches #mutex. Hence, introduce
the mutex_init_with_key() macro. This macro derives the 'name' argument
from the 'mutex' argument.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240912223956.3554086-3-bvanassche@acm.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mutex.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index ef617089db19..2bf91b57591b 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -64,6 +64,17 @@ do {									\
 	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
 
+/**
+ * mutex_init_with_key - initialize a mutex with a given lockdep key
+ * @mutex: the mutex to be initialized
+ * @key: the lockdep key to be associated with the mutex
+ *
+ * Initialize the mutex to the unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
+#define mutex_init_with_key(mutex, key) __mutex_init((mutex), #mutex, (key))
+
 #ifndef CONFIG_PREEMPT_RT
 #define __MUTEX_INITIALIZER(lockname) \
 		{ .owner = ATOMIC_LONG_INIT(0) \
-- 
cgit v1.2.3


From 4b7ff9ab98af11a477d50f08382bcc4c2f899926 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 13 Sep 2024 10:15:56 +0200
Subject: mm, slab: restore kerneldoc for kmem_cache_create()

As kmem_cache_create() became a _Generic() wrapper macro, it currently
has no kerneldoc despite being the main API to use. Add it. Also adjust
kmem_cache_create_usercopy() kerneldoc to indicate it is now a legacy
wrapper.

Also expand the kerneldoc for struct kmem_cache_args, especially for the
freeptr_offset field, where important details were removed with the
removal of kmem_cache_create_rcu().

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/slab.h | 114 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 331412a9f4f2..6a8ab7ef3af7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -242,19 +242,72 @@ bool slab_is_available(void);
 
 /**
  * struct kmem_cache_args - Less common arguments for kmem_cache_create()
- * @align: The required alignment for the objects.
- * @useroffset: Usercopy region offset
- * @usersize: Usercopy region size
- * @freeptr_offset: Custom offset for the free pointer in RCU caches
- * @use_freeptr_offset: Whether a @freeptr_offset is used
- * @ctor: A constructor for the objects.
+ *
+ * Any uninitialized fields of the structure are interpreted as unused. The
+ * exception is @freeptr_offset where %0 is a valid value, so
+ * @use_freeptr_offset must be also set to %true in order to interpret the field
+ * as used. For @useroffset %0 is also valid, but only with non-%0
+ * @usersize.
+ *
+ * When %NULL args is passed to kmem_cache_create(), it is equivalent to all
+ * fields unused.
  */
 struct kmem_cache_args {
+	/**
+	 * @align: The required alignment for the objects.
+	 *
+	 * %0 means no specific alignment is requested.
+	 */
 	unsigned int align;
+	/**
+	 * @useroffset: Usercopy region offset.
+	 *
+	 * %0 is a valid offset, when @usersize is non-%0
+	 */
 	unsigned int useroffset;
+	/**
+	 * @usersize: Usercopy region size.
+	 *
+	 * %0 means no usercopy region is specified.
+	 */
 	unsigned int usersize;
+	/**
+	 * @freeptr_offset: Custom offset for the free pointer
+	 * in &SLAB_TYPESAFE_BY_RCU caches
+	 *
+	 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+	 * outside of the object. This might cause the object to grow in size.
+	 * Cache creators that have a reason to avoid this can specify a custom
+	 * free pointer offset in their struct where the free pointer will be
+	 * placed.
+	 *
+	 * Note that placing the free pointer inside the object requires the
+	 * caller to ensure that no fields are invalidated that are required to
+	 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+	 * details).
+	 *
+	 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+	 * is specified, %use_freeptr_offset must be set %true.
+	 *
+	 * Note that @ctor currently isn't supported with custom free pointers
+	 * as a @ctor requires an external free pointer.
+	 */
 	unsigned int freeptr_offset;
+	/**
+	 * @use_freeptr_offset: Whether a @freeptr_offset is used.
+	 */
 	bool use_freeptr_offset;
+	/**
+	 * @ctor: A constructor for the objects.
+	 *
+	 * The constructor is invoked for each object in a newly allocated slab
+	 * page. It is the cache user's responsibility to free object in the
+	 * same state as after calling the constructor, or deal appropriately
+	 * with any differences between a freshly constructed and a reallocated
+	 * object.
+	 *
+	 * %NULL means no constructor.
+	 */
 	void (*ctor)(void *);
 };
 
@@ -275,30 +328,20 @@ __kmem_cache_create(const char *name, unsigned int size, unsigned int align,
 }
 
 /**
- * kmem_cache_create_usercopy - Create a cache with a region suitable
- * for copying to userspace
+ * kmem_cache_create_usercopy - Create a kmem cache with a region suitable
+ * for copying to userspace.
  * @name: A string which is used in /proc/slabinfo to identify this cache.
  * @size: The size of objects to be created in this cache.
  * @align: The required alignment for the objects.
  * @flags: SLAB flags
  * @useroffset: Usercopy region offset
  * @usersize: Usercopy region size
- * @ctor: A constructor for the objects.
- *
- * Cannot be called within a interrupt, but can be interrupted.
- * The @ctor is run when new pages are allocated by the cache.
- *
- * The flags are
+ * @ctor: A constructor for the objects, or %NULL.
  *
- * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
- * to catch references to uninitialised memory.
- *
- * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
- * for buffer overruns.
- *
- * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
- * cacheline.  This can be beneficial if you're counting cycles as closely
- * as davem.
+ * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY()
+ * if whitelisting a single field is sufficient, or kmem_cache_create() with
+ * the necessary parameters passed via the args parameter (see
+ * &struct kmem_cache_args)
  *
  * Return: a pointer to the cache on success, NULL on failure.
  */
@@ -333,6 +376,31 @@ __kmem_cache_default_args(const char *name, unsigned int size,
 	return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
 }
 
+/**
+ * kmem_cache_create - Create a kmem cache.
+ * @__name: A string which is used in /proc/slabinfo to identify this cache.
+ * @__object_size: The size of objects to be created in this cache.
+ * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL
+ *	    means defaults will be used for all the arguments.
+ *
+ * This is currently implemented as a macro using ``_Generic()`` to call
+ * either the new variant of the function, or a legacy one.
+ *
+ * The new variant has 4 parameters:
+ * ``kmem_cache_create(name, object_size, args, flags)``
+ *
+ * See __kmem_cache_create_args() which implements this.
+ *
+ * The legacy variant has 5 parameters:
+ * ``kmem_cache_create(name, object_size, align, flags, ctor)``
+ *
+ * The align and ctor parameters map to the respective fields of
+ * &struct kmem_cache_args
+ *
+ * Context: Cannot be called within a interrupt, but can be interrupted.
+ *
+ * Return: a pointer to the cache on success, NULL on failure.
+ */
 #define kmem_cache_create(__name, __object_size, __args, ...)           \
 	_Generic((__args),                                              \
 		struct kmem_cache_args *: __kmem_cache_create_args,	\
-- 
cgit v1.2.3


From 6a36d828bdef0e02b1e6c12e2160f5b83be6aab5 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 13 Sep 2024 02:09:55 +0100
Subject: driver core: attribute_container: Remove unused functions

I can't find any use of 'attribute_container_add_class_device_adapter'
or 'attribute_container_trigger' in git history.
Their export decls went in 2006:
  commit 1740757e8f94 ("[PATCH] Driver Core: remove unused exports")

and their docs disappeared in 2016:
  commit 47cb398dd75a ("Docs: sphinxify device-drivers.tmpl")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20240913010955.1393995-1-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/attribute_container.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index e4004d1e6725..b3643de9931d 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev,
 					    int (*undo)(struct attribute_container *,
 							struct device *,
 							struct device *));
-void attribute_container_trigger(struct device *dev, 
-				 int (*fn)(struct attribute_container *,
-					   struct device *));
 int attribute_container_add_attrs(struct device *classdev);
 int attribute_container_add_class_device(struct device *classdev);
-int attribute_container_add_class_device_adapter(struct attribute_container *cont,
-						 struct device *dev,
-						 struct device *classdev);
 void attribute_container_remove_attrs(struct device *classdev);
 void attribute_container_class_device_del(struct device *classdev);
 struct attribute_container *attribute_container_classdev_to_container(struct device *);
-- 
cgit v1.2.3


From 7860df5b29945cfab40dd667f576af31401d7c43 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Fri, 13 Sep 2024 14:36:29 +0530
Subject: ASoC: sdw_util/intel: move soundwire endpoint and dai link structures

Move Soundwire endpoint and dai link structures from Intel generic machine
driver code to common place holder(soc_sdw_utils.h). These structures will
be used in other platform SoundWire machine driver code.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20240913090631.1834543-4-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index e366b7968c2d..e3482720a3eb 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -93,6 +93,27 @@ struct asoc_sdw_mc_private {
 	int codec_info_list_count;
 };
 
+struct asoc_sdw_endpoint {
+	struct list_head list;
+
+	u32 link_mask;
+	const char *codec_name;
+	const char *name_prefix;
+	bool include_sidecar;
+
+	struct asoc_sdw_codec_info *codec_info;
+	const struct asoc_sdw_dai_info *dai_info;
+};
+
+struct asoc_sdw_dailink {
+	bool initialised;
+
+	u8 group_id;
+	u32 link_mask[SNDRV_PCM_STREAM_LAST + 1];
+	int num_devs[SNDRV_PCM_STREAM_LAST + 1];
+	struct list_head endpoints;
+};
+
 extern struct asoc_sdw_codec_info codec_info_list[];
 int asoc_sdw_get_codec_info_list_count(void);
 
-- 
cgit v1.2.3


From 13b24f84782d6c0373f62eb645353883d94d1dcd Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Fri, 13 Sep 2024 14:36:30 +0530
Subject: ASoC: sdw_utils/intel: move soundwire endpoint parsing helper
 functions

Move SoundWire endpoint parsing helper functions to common place holder.
These functions will be used by other platform machine driver code.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20240913090631.1834543-5-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc_sdw_utils.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h
index e3482720a3eb..f68c1f193b3b 100644
--- a/include/sound/soc_sdw_utils.h
+++ b/include/sound/soc_sdw_utils.h
@@ -161,6 +161,16 @@ int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *d
 				  int (*init)(struct snd_soc_pcm_runtime *rtd),
 				  const struct snd_soc_ops *ops);
 
+int asoc_sdw_count_sdw_endpoints(struct snd_soc_card *card, int *num_devs, int *num_ends);
+
+struct asoc_sdw_dailink *asoc_sdw_find_dailink(struct asoc_sdw_dailink *dailinks,
+					       const struct snd_soc_acpi_endpoint *new);
+
+int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card,
+				 struct asoc_sdw_dailink *soc_dais,
+				 struct asoc_sdw_endpoint *soc_ends,
+				 int *num_devs);
+
 int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd);
 
 /* DMIC support */
-- 
cgit v1.2.3


From 2cb4acf2140be8a4f299c0b887cc314845ef6ec8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 13 Sep 2024 07:11:42 -0700
Subject: hwmon: Remove devm_hwmon_device_unregister() API function

devm_hwmon_device_unregister() has no in-tree user, and its implementation
is wrong since it does not pass the to-be-removed hardware monitoring
device as parameter. I do not envision a valid use for it; drivers needing
it should not have called devm_hwmon_device_register_with_info() in the
first place. Remove it.

Reported-by: Matthew Sanders <m@ttsande.rs>
Closes: https://lore.kernel.org/linux-hwmon/488b3bdf870ea76c4b943dbe5fd15ac8113019dc.camel@kernel.org/
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index e94314760aab..5c6a421ad580 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -481,7 +481,6 @@ devm_hwmon_device_register_with_info(struct device *dev,
 				const struct attribute_group **extra_groups);
 
 void hwmon_device_unregister(struct device *dev);
-void devm_hwmon_device_unregister(struct device *dev);
 
 int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
 		       u32 attr, int channel);
-- 
cgit v1.2.3


From 6fd13b282f62bd840f2410692deaa23a42fd91fa Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 27 Aug 2024 17:14:18 +0200
Subject: random: vDSO: move prototype of arch chacha function to
 vdso/getrandom.h

Having the prototype for __arch_chacha20_blocks_nostack in
arch/x86/include/asm/vdso/getrandom.h meant that the prototype and large
doc comment were cloned by every architecture, which has been causing
unnecessary churn. Instead move it into include/vdso/getrandom.h, where
it can be shared by all archs implementing it.

As a side bonus, this then lets us use that prototype in the
vdso_test_chacha self test, to ensure that it matches the source, and
indeed doing so turned up some inconsistencies, which are rectified
here.

Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/vdso/getrandom.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h
index a8b7c14b0ae0..4cf02e678f5e 100644
--- a/include/vdso/getrandom.h
+++ b/include/vdso/getrandom.h
@@ -43,4 +43,17 @@ struct vgetrandom_state {
 	bool 			in_use;
 };
 
+/**
+ * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
+ * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
+ * @key:	32-byte input key.
+ * @counter:	8-byte counter, read on input and updated on return.
+ * @nblocks:	Number of blocks to generate.
+ *
+ * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
+ * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
+ * leaking into forked child processes.
+ */
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
+
 #endif /* _VDSO_GETRANDOM_H */
-- 
cgit v1.2.3


From 7f053812dab3946cb704520b72c381f605ecdf95 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 27 Aug 2024 09:31:47 +0200
Subject: random: vDSO: minimize and simplify header includes

Depending on the architecture, building a 32-bit vDSO on a 64-bit kernel
is problematic when some system headers are included.

Minimise the amount of headers by moving needed items, such as
__{get,put}_unaligned_t, into dedicated common headers and in general
use more specific headers, similar to what was done in commit
8165b57bca21 ("linux/const.h: Extract common header for vDSO") and
commit 8c59ab839f52 ("lib/vdso: Enable common headers").

On some architectures this results in missing PAGE_SIZE, as was
described by commit 8b3843ae3634 ("vdso/datapage: Quick fix - use
asm/page-def.h for ARM64"), so define this if necessary, in the same way
as done prior by commit cffaefd15a8f ("vdso: Use CONFIG_PAGE_SHIFT in
vdso/datapage.h").

Removing linux/time64.h leads to missing 'struct timespec64' in
x86's asm/pvclock.h. Add a forward declaration of that struct in
that file.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/asm-generic/unaligned.h | 11 +----------
 include/vdso/helpers.h          |  1 +
 include/vdso/unaligned.h        | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 10 deletions(-)
 create mode 100644 include/vdso/unaligned.h

(limited to 'include')

diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
index a84c64e5f11e..95acdd70b3b2 100644
--- a/include/asm-generic/unaligned.h
+++ b/include/asm-generic/unaligned.h
@@ -8,16 +8,7 @@
  */
 #include <linux/unaligned/packed_struct.h>
 #include <asm/byteorder.h>
-
-#define __get_unaligned_t(type, ptr) ({						\
-	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
-	__pptr->x;								\
-})
-
-#define __put_unaligned_t(type, val, ptr) do {					\
-	struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);		\
-	__pptr->x = (val);							\
-} while (0)
+#include <vdso/unaligned.h>
 
 #define get_unaligned(ptr)	__get_unaligned_t(typeof(*(ptr)), (ptr))
 #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h
index 73501149439d..3ddb03bb05cb 100644
--- a/include/vdso/helpers.h
+++ b/include/vdso/helpers.h
@@ -4,6 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
 #include <vdso/datapage.h>
 
 static __always_inline u32 vdso_read_begin(const struct vdso_data *vd)
diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h
new file mode 100644
index 000000000000..eee3d2a4dbe4
--- /dev/null
+++ b/include/vdso/unaligned.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_UNALIGNED_H
+#define __VDSO_UNALIGNED_H
+
+#define __get_unaligned_t(type, ptr) ({						\
+	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
+	__pptr->x;								\
+})
+
+#define __put_unaligned_t(type, val, ptr) do {					\
+	struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);		\
+	__pptr->x = (val);							\
+} while (0)
+
+#endif /* __VDSO_UNALIGNED_H */
-- 
cgit v1.2.3


From 4d456f0c87c95efb6197a30cd76b5b6ee5fb012e Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 1 Sep 2024 14:13:10 +0800
Subject: random: vDSO: add a __vdso_getrandom prototype for all architectures

Without a prototype, we'll have to add a prototype for each architecture
implementing vDSO getrandom. As most architectures will likely have the
vDSO getrandom implemented in a near future, and we'd like to keep the
declarations compatible everywhere (to ease the libc implementor work),
we should really just have one copy of the prototype.

This also is what's already done inside of include/vdso/gettime.h for
those vDSO functions, so this continues that convention.

Suggested-by: Huacai Chen <chenhuacai@kernel.org>
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Acked-by: Huacai Chen <chenhuacai@kernel.org>
[Jason: rewrite docbook comment for prototype.]
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/vdso/getrandom.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h
index 4cf02e678f5e..6ca4d6de9e46 100644
--- a/include/vdso/getrandom.h
+++ b/include/vdso/getrandom.h
@@ -56,4 +56,19 @@ struct vgetrandom_state {
  */
 extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
 
+/**
+ * __vdso_getrandom - Architecture-specific vDSO implementation of getrandom() syscall.
+ * @buffer:		Passed to __cvdso_getrandom().
+ * @len:		Passed to __cvdso_getrandom().
+ * @flags:		Passed to __cvdso_getrandom().
+ * @opaque_state:	Passed to __cvdso_getrandom().
+ * @opaque_len:		Passed to __cvdso_getrandom();
+ *
+ * This function is implemented by making a single call to to __cvdso_getrandom(), whose
+ * documentation may be consulted for more information.
+ *
+ * Returns:	The return value of __cvdso_getrandom().
+ */
+extern ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
 #endif /* _VDSO_GETRANDOM_H */
-- 
cgit v1.2.3


From d175ee98fe545d2c56df22751314584cce228307 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 2 Sep 2024 21:17:18 +0200
Subject: mm: Define VM_DROPPABLE for powerpc/32

Commit 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always
lazily freeable mappings") only adds VM_DROPPABLE for 64 bits
architectures.

In order to also use the getrandom vDSO implementation on powerpc/32,
use VM_ARCH_1 for VM_DROPPABLE on powerpc/32. This is possible because
VM_ARCH_1 is used for VM_SAO on powerpc and VM_SAO is only for
powerpc/64. It is used in combination with PROT_SAO in some parts of
code that are restricted to CONFIG_PPC64 through #ifdefs, it is
therefore possible to define VM_SAO for CONFIG_PPC64 only.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/mm.h             | 4 +++-
 include/trace/events/mmflags.h | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6549d0979b28..028847f39442 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -359,7 +359,7 @@ extern unsigned int kobjsize(const void *objp);
 
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC)
+#elif defined(CONFIG_PPC64)
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
@@ -409,6 +409,8 @@ extern unsigned int kobjsize(const void *objp);
 #ifdef CONFIG_64BIT
 #define VM_DROPPABLE_BIT	40
 #define VM_DROPPABLE		BIT(VM_DROPPABLE_BIT)
+#elif defined(CONFIG_PPC32)
+#define VM_DROPPABLE		VM_ARCH_1
 #else
 #define VM_DROPPABLE		VM_NONE
 #endif
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b63d211bd141..37265977d524 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -143,7 +143,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
 
 #if defined(CONFIG_X86)
 #define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
-#elif defined(CONFIG_PPC)
+#elif defined(CONFIG_PPC64)
 #define __VM_ARCH_SPECIFIC_1 {VM_SAO,     "sao"           }
 #elif defined(CONFIG_PARISC)
 #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP,	"growsup"	}
@@ -165,7 +165,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
 # define IF_HAVE_UFFD_MINOR(flag, name)
 #endif
 
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
 # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
 #else
 # define IF_HAVE_VM_DROPPABLE(flag, name)
-- 
cgit v1.2.3


From 49e2e353fb0dbef8dced3e8e65365580349c4b14 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Thu, 12 Sep 2024 07:27:37 +0800
Subject: ASoC: tas2781: Add Calibration Kcontrols for Chromebook

Add calibration related kcontrol for speaker impedance calibration and
speaker leakage check for Chromebook.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240911232739.1509-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index dbda552398b5..8cd6da0480b7 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -49,12 +49,59 @@
 /*I2C Checksum */
 #define TASDEVICE_I2CChecksum		TASDEVICE_REG(0x0, 0x0, 0x7E)
 
+/* XM_340 */
+#define	TASDEVICE_XM_A1_REG	TASDEVICE_REG(0x64, 0x63, 0x3c)
+/* XM_341 */
+#define	TASDEVICE_XM_A2_REG	TASDEVICE_REG(0x64, 0x63, 0x38)
+
 /* Volume control */
 #define TAS2563_DVC_LVL			TASDEVICE_REG(0x00, 0x02, 0x0C)
 #define TAS2781_DVC_LVL			TASDEVICE_REG(0x0, 0x0, 0x1A)
 #define TAS2781_AMP_LEVEL		TASDEVICE_REG(0x0, 0x0, 0x03)
 #define TAS2781_AMP_LEVEL_MASK		GENMASK(5, 1)
 
+#define TAS2563_IDLE		TASDEVICE_REG(0x00, 0x00, 0x3e)
+#define TAS2563_PRM_R0_REG		TASDEVICE_REG(0x00, 0x0f, 0x34)
+
+#define TAS2563_RUNTIME_RE_REG_TF	TASDEVICE_REG(0x64, 0x02, 0x70)
+#define TAS2563_RUNTIME_RE_REG		TASDEVICE_REG(0x64, 0x02, 0x48)
+
+#define TAS2563_PRM_ENFF_REG		TASDEVICE_REG(0x00, 0x0d, 0x54)
+#define TAS2563_PRM_DISTCK_REG		TASDEVICE_REG(0x00, 0x0d, 0x58)
+#define TAS2563_PRM_TE_SCTHR_REG	TASDEVICE_REG(0x00, 0x0f, 0x60)
+#define TAS2563_PRM_PLT_FLAG_REG	TASDEVICE_REG(0x00, 0x0d, 0x74)
+#define TAS2563_PRM_SINEGAIN_REG	TASDEVICE_REG(0x00, 0x0d, 0x7c)
+/* prm_Int_B0 */
+#define TAS2563_TE_TA1_REG		TASDEVICE_REG(0x00, 0x10, 0x0c)
+/* prm_Int_A1 */
+#define TAS2563_TE_TA1_AT_REG		TASDEVICE_REG(0x00, 0x10, 0x10)
+/* prm_TE_Beta */
+#define TAS2563_TE_TA2_REG		TASDEVICE_REG(0x00, 0x0f, 0x64)
+/* prm_TE_Beta1 */
+#define TAS2563_TE_AT_REG		TASDEVICE_REG(0x00, 0x0f, 0x68)
+/* prm_TE_1_Beta1 */
+#define TAS2563_TE_DT_REG		TASDEVICE_REG(0x00, 0x0f, 0x70)
+
+#define TAS2781_PRM_INT_MASK_REG	TASDEVICE_REG(0x00, 0x00, 0x3b)
+#define TAS2781_PRM_CLK_CFG_REG		TASDEVICE_REG(0x00, 0x00, 0x5c)
+#define TAS2781_PRM_RSVD_REG		TASDEVICE_REG(0x00, 0x01, 0x19)
+#define TAS2781_PRM_TEST_57_REG		TASDEVICE_REG(0x00, 0xfd, 0x39)
+#define TAS2781_PRM_TEST_62_REG		TASDEVICE_REG(0x00, 0xfd, 0x3e)
+#define TAS2781_PRM_PVDD_UVLO_REG	TASDEVICE_REG(0x00, 0x00, 0x71)
+#define TAS2781_PRM_CHNL_0_REG		TASDEVICE_REG(0x00, 0x00, 0x03)
+#define TAS2781_PRM_NG_CFG0_REG		TASDEVICE_REG(0x00, 0x00, 0x35)
+#define TAS2781_PRM_IDLE_CH_DET_REG	TASDEVICE_REG(0x00, 0x00, 0x66)
+#define TAS2781_PRM_PLT_FLAG_REG	TASDEVICE_REG(0x00, 0x14, 0x38)
+#define TAS2781_PRM_SINEGAIN_REG	TASDEVICE_REG(0x00, 0x14, 0x40)
+#define TAS2781_PRM_SINEGAIN2_REG	TASDEVICE_REG(0x00, 0x14, 0x44)
+
+#define TAS2781_TEST_UNLOCK_REG		TASDEVICE_REG(0x00, 0xFD, 0x0D)
+#define TAS2781_TEST_PAGE_UNLOCK	0x0D
+
+#define TAS2781_RUNTIME_LATCH_RE_REG	TASDEVICE_REG(0x00, 0x00, 0x49)
+#define TAS2781_RUNTIME_RE_REG_TF	TASDEVICE_REG(0x64, 0x62, 0x48)
+#define TAS2781_RUNTIME_RE_REG		TASDEVICE_REG(0x64, 0x63, 0x44)
+
 #define TASDEVICE_CMD_SING_W		0x1
 #define TASDEVICE_CMD_BURST		0x2
 #define TASDEVICE_CMD_DELAY		0x3
@@ -70,7 +117,15 @@ enum device_catlog_id {
 	OTHERS
 };
 
+struct bulk_reg_val {
+	int reg;
+	unsigned char val[4];
+	unsigned char val_len;
+	bool is_locked;
+};
+
 struct tasdevice {
+	struct bulk_reg_val *cali_data_backup;
 	struct tasdevice_fw *cali_data_fmw;
 	unsigned int dev_addr;
 	unsigned int err_code;
@@ -81,9 +136,19 @@ struct tasdevice {
 	bool is_loaderr;
 };
 
+struct cali_reg {
+	unsigned int r0_reg;
+	unsigned int r0_low_reg;
+	unsigned int invr0_reg;
+	unsigned int pow_reg;
+	unsigned int tlimit_reg;
+};
+
 struct calidata {
 	unsigned char *data;
 	unsigned long total_sz;
+	struct cali_reg cali_reg_array;
+	unsigned int cali_dat_sz_per_dev;
 };
 
 struct tasdevice_priv {
@@ -119,6 +184,7 @@ struct tasdevice_priv {
 	bool force_fwload_status;
 	bool playback_started;
 	bool isacpi;
+	bool is_user_space_calidata;
 	unsigned int global_addr;
 
 	int (*fw_parse_variable_header)(struct tasdevice_priv *tas_priv,
@@ -145,6 +211,8 @@ int tasdevice_init(struct tasdevice_priv *tas_priv);
 void tasdevice_remove(struct tasdevice_priv *tas_priv);
 int tasdevice_save_calibration(struct tasdevice_priv *tas_priv);
 void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv);
+int tasdev_chn_switch(struct tasdevice_priv *tas_priv,
+	unsigned short chn);
 int tasdevice_dev_read(struct tasdevice_priv *tas_priv,
 	unsigned short chn, unsigned int reg, unsigned int *value);
 int tasdevice_dev_write(struct tasdevice_priv *tas_priv,
-- 
cgit v1.2.3


From 2b018086143d638de8d67ae5be6e8c1afb413193 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 13 Sep 2024 11:28:46 -0700
Subject: blk-mq: unconditional nr_integrity_segments

Always defining the field will make using it easier and less error prone
in future patches.

There shouldn't be any downside to this: the field fits in what would
otherwise be a 2-byte hole, so we're not saving space by conditionally
leaving it out.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20240913182854.2445457-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8d304b1d16b1..4fecf46ef681 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -149,10 +149,7 @@ struct request {
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
-
-#ifdef CONFIG_BLK_DEV_INTEGRITY
 	unsigned short nr_integrity_segments;
-#endif
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 	struct bio_crypt_ctx *crypt_ctx;
-- 
cgit v1.2.3


From d2c5b1faccd5ef6352456f817e941945d3b3fe62 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 13 Sep 2024 11:28:50 -0700
Subject: block: provide a request helper for user integrity segments

Provide a helper to keep the request flags and nr_integrity_segments in
sync with the bio's integrity payload. This is an integrity equivalent
to the normal data helper function, 'blk_rq_map_user()'.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20240913182854.2445457-6-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index de98049b7ded..793dbb1e0672 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -28,6 +28,8 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
 int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
+			      ssize_t bytes, u32 seed);
 
 static inline bool
 blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -102,6 +104,13 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q,
 {
 	return 0;
 }
+static inline int blk_rq_integrity_map_user(struct request *rq,
+					    void __user *ubuf,
+					    ssize_t bytes,
+					    u32 seed)
+{
+	return -EINVAL;
+}
 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
 {
 	return NULL;
-- 
cgit v1.2.3


From 58b3ac2447c22189cd3a26ac1babb2dae13fd514 Mon Sep 17 00:00:00 2001
From: Nikunj Kela <quic_nkela@quicinc.com>
Date: Tue, 10 Sep 2024 09:26:37 -0700
Subject: dt-bindings: interrupt-controller: arm,gic: add ESPI and EPPI
 specifiers

Extended SPI and extended PPI interrupts are in the range [0-1023] and
[0-127] respectively, supported by GICv3.1.

Qualcomm SA8255p platform uses extended SPI for SCMI 'a2p' doorbells.

Signed-off-by: Nikunj Kela <quic_nkela@quicinc.com>
Link: https://lore.kernel.org/r/20240910162637.2382656-1-quic_nkela@quicinc.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/dt-bindings/interrupt-controller/arm-gic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/interrupt-controller/arm-gic.h b/include/dt-bindings/interrupt-controller/arm-gic.h
index 35b6f69b7db6..887f53363e8a 100644
--- a/include/dt-bindings/interrupt-controller/arm-gic.h
+++ b/include/dt-bindings/interrupt-controller/arm-gic.h
@@ -12,6 +12,8 @@
 
 #define GIC_SPI 0
 #define GIC_PPI 1
+#define GIC_ESPI 2
+#define GIC_EPPI 3
 
 /*
  * Interrupt specifier cell 2.
-- 
cgit v1.2.3


From 76c313f658d2752e8527610677164aa7094ef7a5 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 13 Sep 2024 12:17:46 -0700
Subject: blk-integrity: improved sg segment mapping

Make the integrity mapping more like data mapping, blk_rq_map_sg. Use
the request to validate the segment count, and update the callers so
they don't have to.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20240913191746.2628196-1-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 793dbb1e0672..676f8f860c47 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -25,8 +25,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
-				   struct scatterlist *);
+int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
 			      ssize_t bytes, u32 seed);
@@ -98,8 +97,7 @@ static inline int blk_rq_count_integrity_sg(struct request_queue *q,
 {
 	return 0;
 }
-static inline int blk_rq_map_integrity_sg(struct request_queue *q,
-					  struct bio *b,
+static inline int blk_rq_map_integrity_sg(struct request *q,
 					  struct scatterlist *s)
 {
 	return 0;
-- 
cgit v1.2.3


From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 13 Sep 2024 21:17:48 +0200
Subject: bpf: Fix helper writes to read-only maps

Lonial found an issue that despite user- and BPF-side frozen BPF map
(like in case of .rodata), it was still possible to write into it from
a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT}
as arguments.

In check_func_arg() when the argument is as mentioned, the meta->raw_mode
is never set. Later, check_helper_mem_access(), under the case of
PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the
subsequent call to check_map_access_type() and given the BPF map is
read-only it succeeds.

The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT
when results are written into them as opposed to read out of them. The
latter indicates that it's okay to pass a pointer to uninitialized memory
as the memory is written to anyway.

However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM
just with additional alignment requirement. So it is better to just get
rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the
fixed size memory types. For this, add MEM_ALIGNED to additionally ensure
alignment given these helpers write directly into the args via *<ptr> = val.
The .arg*_size has been initialized reflecting the actual sizeof(*<ptr>).

MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated
argument types, since in !MEM_FIXED_SIZE cases the verifier does not know
the buffer size a priori and therefore cannot blindly write *<ptr> = val.

Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types")
Reported-by: Lonial Con <kongln9170@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7f3e8477d5e7..0c3893c47171 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -695,6 +695,11 @@ enum bpf_type_flag {
 	/* DYNPTR points to xdp_buff */
 	DYNPTR_TYPE_XDP		= BIT(16 + BPF_BASE_TYPE_BITS),
 
+	/* Memory must be aligned on some architectures, used in combination with
+	 * MEM_FIXED_SIZE.
+	 */
+	MEM_ALIGNED		= BIT(17 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
@@ -732,8 +737,6 @@ enum bpf_arg_type {
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
-	ARG_PTR_TO_INT,		/* pointer to int */
-	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
 	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
 	ARG_PTR_TO_RINGBUF_MEM,	/* pointer to dynamically reserved ringbuf memory */
-- 
cgit v1.2.3


From 0cca961a026177af69044f10d6ae76d8ce043764 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 12 Sep 2024 11:00:25 +0530
Subject: PCI: Pass domain number to pci_bus_release_domain_nr() explicitly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pci_bus_release_domain_nr() API is supposed to free the domain
number allocated by pci_bus_find_domain_nr(). Most of the callers of
pci_bus_find_domain_nr(), store the domain number in pci_bus::domain_nr.

As such, the pci_bus_release_domain_nr() implicitly frees the domain
number by dereferencing 'struct pci_bus'. However, one of the callers
of this API, the PCI endpoint subsystem, doesn't have 'struct pci_bus',
so it only passes NULL. Due to this, the API will end up dereferencing
the NULL pointer.

To fix this issue, pass the domain number to this API explicitly. Since
'struct pci_bus' is not used for anything else other than extracting the
domain number, it makes sense to pass the domain number directly.

Fixes: 0328947c5032 ("PCI: endpoint: Assign PCI domain number for endpoint controllers")
Closes: https://lore.kernel.org/linux-pci/c0c40ddb-bf64-4b22-9dd1-8dbb18aa2813@stanley.mountain
Link: https://lore.kernel.org/linux-pci/20240912053025.25314-1-manivannan.sadhasivam@linaro.org
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
[kwilczynski: commit log]
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..37d97bef060f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1884,7 +1884,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
 { return 0; }
 #endif
 int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
-void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
+void pci_bus_release_domain_nr(struct device *parent, int domain_nr);
 #endif
 
 /* Some architectures require additional setup to direct VGA traffic */
-- 
cgit v1.2.3


From c951a29f6ba52b86223eb00bbcff43142d59a901 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 11 Sep 2024 12:37:43 +0300
Subject: net: fib_rules: Add DSCP selector attribute

The FIB rule TOS selector is implemented differently between IPv4 and
IPv6. In IPv4 it is used to match on the three "Type of Services" bits
specified in RFC 791, while in IPv6 is it is used to match on the six
DSCP bits specified in RFC 2474.

Add a new FIB rule attribute to allow matching on DSCP. The attribute
will be used to implement a 'dscp' selector in ip-rule with a consistent
behavior between IPv4 and IPv6.

For now, set the type of the attribute to 'NLA_REJECT' so that user
space will not be able to configure it. This restriction will be lifted
once both IPv4 and IPv6 support the new attribute.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20240911093748.3662015-2-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/fib_rules.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 232df14e1287..a6924dd3aff1 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -67,6 +67,7 @@ enum {
 	FRA_IP_PROTO,	/* ip proto */
 	FRA_SPORT_RANGE, /* sport */
 	FRA_DPORT_RANGE, /* dport */
+	FRA_DSCP,	/* dscp */
 	__FRA_MAX
 };
 
-- 
cgit v1.2.3


From 1b8c9cb3151a541a4197d2bb449233064f747832 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Thu, 12 Sep 2024 09:19:49 +0800
Subject: MIPS: Remove the obsoleted code for include/linux/mv643xx.h

Most of the drivers which used this header have been deleted, most
of these code is obsoleted, move the only defines that are actually
used into arch/powerpc/platforms/chrp/pegasos_eth.c and delete the
file completely.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://patch.msgid.link/20240912011949.2726928-1-cuigaosheng1@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mv643xx.h | 921 ------------------------------------------------
 1 file changed, 921 deletions(-)
 delete mode 100644 include/linux/mv643xx.h

(limited to 'include')

diff --git a/include/linux/mv643xx.h b/include/linux/mv643xx.h
deleted file mode 100644
index 000b126acfb6..000000000000
--- a/include/linux/mv643xx.h
+++ /dev/null
@@ -1,921 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * mv643xx.h - MV-643XX Internal registers definition file.
- *
- * Copyright 2002 Momentum Computer, Inc.
- * 	Author: Matthew Dharm <mdharm@momenco.com>
- * Copyright 2002 GALILEO TECHNOLOGY, LTD. 
- */
-#ifndef __ASM_MV643XX_H
-#define __ASM_MV643XX_H
-
-#include <asm/types.h>
-#include <linux/mv643xx_eth.h>
-#include <linux/mv643xx_i2c.h>
-
-/****************************************/
-/* Processor Address Space              */
-/****************************************/
-
-/* DDR SDRAM BAR and size registers */
-
-#define MV64340_CS_0_BASE_ADDR                                      0x008
-#define MV64340_CS_0_SIZE                                           0x010
-#define MV64340_CS_1_BASE_ADDR                                      0x208
-#define MV64340_CS_1_SIZE                                           0x210
-#define MV64340_CS_2_BASE_ADDR                                      0x018
-#define MV64340_CS_2_SIZE                                           0x020
-#define MV64340_CS_3_BASE_ADDR                                      0x218
-#define MV64340_CS_3_SIZE                                           0x220
-
-/* Devices BAR and size registers */
-
-#define MV64340_DEV_CS0_BASE_ADDR                                   0x028
-#define MV64340_DEV_CS0_SIZE                                        0x030
-#define MV64340_DEV_CS1_BASE_ADDR                                   0x228
-#define MV64340_DEV_CS1_SIZE                                        0x230
-#define MV64340_DEV_CS2_BASE_ADDR                                   0x248
-#define MV64340_DEV_CS2_SIZE                                        0x250
-#define MV64340_DEV_CS3_BASE_ADDR                                   0x038
-#define MV64340_DEV_CS3_SIZE                                        0x040
-#define MV64340_BOOTCS_BASE_ADDR                                    0x238
-#define MV64340_BOOTCS_SIZE                                         0x240
-
-/* PCI 0 BAR and size registers */
-
-#define MV64340_PCI_0_IO_BASE_ADDR                                  0x048
-#define MV64340_PCI_0_IO_SIZE                                       0x050
-#define MV64340_PCI_0_MEMORY0_BASE_ADDR                             0x058
-#define MV64340_PCI_0_MEMORY0_SIZE                                  0x060
-#define MV64340_PCI_0_MEMORY1_BASE_ADDR                             0x080
-#define MV64340_PCI_0_MEMORY1_SIZE                                  0x088
-#define MV64340_PCI_0_MEMORY2_BASE_ADDR                             0x258
-#define MV64340_PCI_0_MEMORY2_SIZE                                  0x260
-#define MV64340_PCI_0_MEMORY3_BASE_ADDR                             0x280
-#define MV64340_PCI_0_MEMORY3_SIZE                                  0x288
-
-/* PCI 1 BAR and size registers */
-#define MV64340_PCI_1_IO_BASE_ADDR                                  0x090
-#define MV64340_PCI_1_IO_SIZE                                       0x098
-#define MV64340_PCI_1_MEMORY0_BASE_ADDR                             0x0a0
-#define MV64340_PCI_1_MEMORY0_SIZE                                  0x0a8
-#define MV64340_PCI_1_MEMORY1_BASE_ADDR                             0x0b0
-#define MV64340_PCI_1_MEMORY1_SIZE                                  0x0b8
-#define MV64340_PCI_1_MEMORY2_BASE_ADDR                             0x2a0
-#define MV64340_PCI_1_MEMORY2_SIZE                                  0x2a8
-#define MV64340_PCI_1_MEMORY3_BASE_ADDR                             0x2b0
-#define MV64340_PCI_1_MEMORY3_SIZE                                  0x2b8
-
-/* SRAM base address */
-#define MV64340_INTEGRATED_SRAM_BASE_ADDR                           0x268
-
-/* internal registers space base address */
-#define MV64340_INTERNAL_SPACE_BASE_ADDR                            0x068
-
-/* Enables the CS , DEV_CS , PCI 0 and PCI 1 
-   windows above */
-#define MV64340_BASE_ADDR_ENABLE                                    0x278
-
-/****************************************/
-/* PCI remap registers                  */
-/****************************************/
-      /* PCI 0 */
-#define MV64340_PCI_0_IO_ADDR_REMAP                                 0x0f0
-#define MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP                        0x0f8
-#define MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP                       0x320
-#define MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP                        0x100
-#define MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP                       0x328
-#define MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP                        0x2f8
-#define MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP                       0x330
-#define MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP                        0x300
-#define MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP                       0x338
-      /* PCI 1 */
-#define MV64340_PCI_1_IO_ADDR_REMAP                                 0x108
-#define MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP                        0x110
-#define MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP                       0x340
-#define MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP                        0x118
-#define MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP                       0x348
-#define MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP                        0x310
-#define MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP                       0x350
-#define MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP                        0x318
-#define MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP                       0x358
- 
-#define MV64340_CPU_PCI_0_HEADERS_RETARGET_CONTROL                  0x3b0
-#define MV64340_CPU_PCI_0_HEADERS_RETARGET_BASE                     0x3b8
-#define MV64340_CPU_PCI_1_HEADERS_RETARGET_CONTROL                  0x3c0
-#define MV64340_CPU_PCI_1_HEADERS_RETARGET_BASE                     0x3c8
-#define MV64340_CPU_GE_HEADERS_RETARGET_CONTROL                     0x3d0
-#define MV64340_CPU_GE_HEADERS_RETARGET_BASE                        0x3d8
-#define MV64340_CPU_IDMA_HEADERS_RETARGET_CONTROL                   0x3e0
-#define MV64340_CPU_IDMA_HEADERS_RETARGET_BASE                      0x3e8
-
-/****************************************/
-/*         CPU Control Registers        */
-/****************************************/
-
-#define MV64340_CPU_CONFIG                                          0x000
-#define MV64340_CPU_MODE                                            0x120
-#define MV64340_CPU_MASTER_CONTROL                                  0x160
-#define MV64340_CPU_CROSS_BAR_CONTROL_LOW                           0x150
-#define MV64340_CPU_CROSS_BAR_CONTROL_HIGH                          0x158
-#define MV64340_CPU_CROSS_BAR_TIMEOUT                               0x168
-
-/****************************************/
-/* SMP RegisterS                        */
-/****************************************/
-
-#define MV64340_SMP_WHO_AM_I                                        0x200
-#define MV64340_SMP_CPU0_DOORBELL                                   0x214
-#define MV64340_SMP_CPU0_DOORBELL_CLEAR                             0x21C
-#define MV64340_SMP_CPU1_DOORBELL                                   0x224
-#define MV64340_SMP_CPU1_DOORBELL_CLEAR                             0x22C
-#define MV64340_SMP_CPU0_DOORBELL_MASK                              0x234
-#define MV64340_SMP_CPU1_DOORBELL_MASK                              0x23C
-#define MV64340_SMP_SEMAPHOR0                                       0x244
-#define MV64340_SMP_SEMAPHOR1                                       0x24c
-#define MV64340_SMP_SEMAPHOR2                                       0x254
-#define MV64340_SMP_SEMAPHOR3                                       0x25c
-#define MV64340_SMP_SEMAPHOR4                                       0x264
-#define MV64340_SMP_SEMAPHOR5                                       0x26c
-#define MV64340_SMP_SEMAPHOR6                                       0x274
-#define MV64340_SMP_SEMAPHOR7                                       0x27c
-
-/****************************************/
-/*  CPU Sync Barrier Register           */
-/****************************************/
-
-#define MV64340_CPU_0_SYNC_BARRIER_TRIGGER                          0x0c0
-#define MV64340_CPU_0_SYNC_BARRIER_VIRTUAL                          0x0c8
-#define MV64340_CPU_1_SYNC_BARRIER_TRIGGER                          0x0d0
-#define MV64340_CPU_1_SYNC_BARRIER_VIRTUAL                          0x0d8
-
-/****************************************/
-/* CPU Access Protect                   */
-/****************************************/
-
-#define MV64340_CPU_PROTECT_WINDOW_0_BASE_ADDR                      0x180
-#define MV64340_CPU_PROTECT_WINDOW_0_SIZE                           0x188
-#define MV64340_CPU_PROTECT_WINDOW_1_BASE_ADDR                      0x190
-#define MV64340_CPU_PROTECT_WINDOW_1_SIZE                           0x198
-#define MV64340_CPU_PROTECT_WINDOW_2_BASE_ADDR                      0x1a0
-#define MV64340_CPU_PROTECT_WINDOW_2_SIZE                           0x1a8
-#define MV64340_CPU_PROTECT_WINDOW_3_BASE_ADDR                      0x1b0
-#define MV64340_CPU_PROTECT_WINDOW_3_SIZE                           0x1b8
-
-
-/****************************************/
-/*          CPU Error Report            */
-/****************************************/
-
-#define MV64340_CPU_ERROR_ADDR_LOW                                  0x070
-#define MV64340_CPU_ERROR_ADDR_HIGH                                 0x078
-#define MV64340_CPU_ERROR_DATA_LOW                                  0x128
-#define MV64340_CPU_ERROR_DATA_HIGH                                 0x130
-#define MV64340_CPU_ERROR_PARITY                                    0x138
-#define MV64340_CPU_ERROR_CAUSE                                     0x140
-#define MV64340_CPU_ERROR_MASK                                      0x148
-
-/****************************************/
-/*      CPU Interface Debug Registers 	*/
-/****************************************/
-
-#define MV64340_PUNIT_SLAVE_DEBUG_LOW                               0x360
-#define MV64340_PUNIT_SLAVE_DEBUG_HIGH                              0x368
-#define MV64340_PUNIT_MASTER_DEBUG_LOW                              0x370
-#define MV64340_PUNIT_MASTER_DEBUG_HIGH                             0x378
-#define MV64340_PUNIT_MMASK                                         0x3e4
-
-/****************************************/
-/*  Integrated SRAM Registers           */
-/****************************************/
-
-#define MV64340_SRAM_CONFIG                                         0x380
-#define MV64340_SRAM_TEST_MODE                                      0X3F4
-#define MV64340_SRAM_ERROR_CAUSE                                    0x388
-#define MV64340_SRAM_ERROR_ADDR                                     0x390
-#define MV64340_SRAM_ERROR_ADDR_HIGH                                0X3F8
-#define MV64340_SRAM_ERROR_DATA_LOW                                 0x398
-#define MV64340_SRAM_ERROR_DATA_HIGH                                0x3a0
-#define MV64340_SRAM_ERROR_DATA_PARITY                              0x3a8
-
-/****************************************/
-/* SDRAM Configuration                  */
-/****************************************/
-
-#define MV64340_SDRAM_CONFIG                                        0x1400
-#define MV64340_D_UNIT_CONTROL_LOW                                  0x1404
-#define MV64340_D_UNIT_CONTROL_HIGH                                 0x1424
-#define MV64340_SDRAM_TIMING_CONTROL_LOW                            0x1408
-#define MV64340_SDRAM_TIMING_CONTROL_HIGH                           0x140c
-#define MV64340_SDRAM_ADDR_CONTROL                                  0x1410
-#define MV64340_SDRAM_OPEN_PAGES_CONTROL                            0x1414
-#define MV64340_SDRAM_OPERATION                                     0x1418
-#define MV64340_SDRAM_MODE                                          0x141c
-#define MV64340_EXTENDED_DRAM_MODE                                  0x1420
-#define MV64340_SDRAM_CROSS_BAR_CONTROL_LOW                         0x1430
-#define MV64340_SDRAM_CROSS_BAR_CONTROL_HIGH                        0x1434
-#define MV64340_SDRAM_CROSS_BAR_TIMEOUT                             0x1438
-#define MV64340_SDRAM_ADDR_CTRL_PADS_CALIBRATION                    0x14c0
-#define MV64340_SDRAM_DATA_PADS_CALIBRATION                         0x14c4
-
-/****************************************/
-/* SDRAM Error Report                   */
-/****************************************/
-
-#define MV64340_SDRAM_ERROR_DATA_LOW                                0x1444
-#define MV64340_SDRAM_ERROR_DATA_HIGH                               0x1440
-#define MV64340_SDRAM_ERROR_ADDR                                    0x1450
-#define MV64340_SDRAM_RECEIVED_ECC                                  0x1448
-#define MV64340_SDRAM_CALCULATED_ECC                                0x144c
-#define MV64340_SDRAM_ECC_CONTROL                                   0x1454
-#define MV64340_SDRAM_ECC_ERROR_COUNTER                             0x1458
-
-/******************************************/
-/*  Controlled Delay Line (CDL) Registers */
-/******************************************/
-
-#define MV64340_DFCDL_CONFIG0                                       0x1480
-#define MV64340_DFCDL_CONFIG1                                       0x1484
-#define MV64340_DLL_WRITE                                           0x1488
-#define MV64340_DLL_READ                                            0x148c
-#define MV64340_SRAM_ADDR                                           0x1490
-#define MV64340_SRAM_DATA0                                          0x1494
-#define MV64340_SRAM_DATA1                                          0x1498
-#define MV64340_SRAM_DATA2                                          0x149c
-#define MV64340_DFCL_PROBE                                          0x14a0
-
-/******************************************/
-/*   Debug Registers                      */
-/******************************************/
-
-#define MV64340_DUNIT_DEBUG_LOW                                     0x1460
-#define MV64340_DUNIT_DEBUG_HIGH                                    0x1464
-#define MV64340_DUNIT_MMASK                                         0X1b40
-
-/****************************************/
-/* Device Parameters			*/
-/****************************************/
-
-#define MV64340_DEVICE_BANK0_PARAMETERS				    0x45c
-#define MV64340_DEVICE_BANK1_PARAMETERS				    0x460
-#define MV64340_DEVICE_BANK2_PARAMETERS				    0x464
-#define MV64340_DEVICE_BANK3_PARAMETERS				    0x468
-#define MV64340_DEVICE_BOOT_BANK_PARAMETERS			    0x46c
-#define MV64340_DEVICE_INTERFACE_CONTROL                            0x4c0
-#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_LOW              0x4c8
-#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_HIGH             0x4cc
-#define MV64340_DEVICE_INTERFACE_CROSS_BAR_TIMEOUT                  0x4c4
-
-/****************************************/
-/* Device interrupt registers		*/
-/****************************************/
-
-#define MV64340_DEVICE_INTERRUPT_CAUSE				    0x4d0
-#define MV64340_DEVICE_INTERRUPT_MASK				    0x4d4
-#define MV64340_DEVICE_ERROR_ADDR				    0x4d8
-#define MV64340_DEVICE_ERROR_DATA   				    0x4dc
-#define MV64340_DEVICE_ERROR_PARITY     			    0x4e0
-
-/****************************************/
-/* Device debug registers   		*/
-/****************************************/
-
-#define MV64340_DEVICE_DEBUG_LOW     				    0x4e4
-#define MV64340_DEVICE_DEBUG_HIGH     				    0x4e8
-#define MV64340_RUNIT_MMASK                                         0x4f0
-
-/****************************************/
-/* PCI Slave Address Decoding registers */
-/****************************************/
-
-#define MV64340_PCI_0_CS_0_BANK_SIZE                                0xc08
-#define MV64340_PCI_1_CS_0_BANK_SIZE                                0xc88
-#define MV64340_PCI_0_CS_1_BANK_SIZE                                0xd08
-#define MV64340_PCI_1_CS_1_BANK_SIZE                                0xd88
-#define MV64340_PCI_0_CS_2_BANK_SIZE                                0xc0c
-#define MV64340_PCI_1_CS_2_BANK_SIZE                                0xc8c
-#define MV64340_PCI_0_CS_3_BANK_SIZE                                0xd0c
-#define MV64340_PCI_1_CS_3_BANK_SIZE                                0xd8c
-#define MV64340_PCI_0_DEVCS_0_BANK_SIZE                             0xc10
-#define MV64340_PCI_1_DEVCS_0_BANK_SIZE                             0xc90
-#define MV64340_PCI_0_DEVCS_1_BANK_SIZE                             0xd10
-#define MV64340_PCI_1_DEVCS_1_BANK_SIZE                             0xd90
-#define MV64340_PCI_0_DEVCS_2_BANK_SIZE                             0xd18
-#define MV64340_PCI_1_DEVCS_2_BANK_SIZE                             0xd98
-#define MV64340_PCI_0_DEVCS_3_BANK_SIZE                             0xc14
-#define MV64340_PCI_1_DEVCS_3_BANK_SIZE                             0xc94
-#define MV64340_PCI_0_DEVCS_BOOT_BANK_SIZE                          0xd14
-#define MV64340_PCI_1_DEVCS_BOOT_BANK_SIZE                          0xd94
-#define MV64340_PCI_0_P2P_MEM0_BAR_SIZE                             0xd1c
-#define MV64340_PCI_1_P2P_MEM0_BAR_SIZE                             0xd9c
-#define MV64340_PCI_0_P2P_MEM1_BAR_SIZE                             0xd20
-#define MV64340_PCI_1_P2P_MEM1_BAR_SIZE                             0xda0
-#define MV64340_PCI_0_P2P_I_O_BAR_SIZE                              0xd24
-#define MV64340_PCI_1_P2P_I_O_BAR_SIZE                              0xda4
-#define MV64340_PCI_0_CPU_BAR_SIZE                                  0xd28
-#define MV64340_PCI_1_CPU_BAR_SIZE                                  0xda8
-#define MV64340_PCI_0_INTERNAL_SRAM_BAR_SIZE                        0xe00
-#define MV64340_PCI_1_INTERNAL_SRAM_BAR_SIZE                        0xe80
-#define MV64340_PCI_0_EXPANSION_ROM_BAR_SIZE                        0xd2c
-#define MV64340_PCI_1_EXPANSION_ROM_BAR_SIZE                        0xd9c
-#define MV64340_PCI_0_BASE_ADDR_REG_ENABLE                          0xc3c
-#define MV64340_PCI_1_BASE_ADDR_REG_ENABLE                          0xcbc
-#define MV64340_PCI_0_CS_0_BASE_ADDR_REMAP			    0xc48
-#define MV64340_PCI_1_CS_0_BASE_ADDR_REMAP			    0xcc8
-#define MV64340_PCI_0_CS_1_BASE_ADDR_REMAP			    0xd48
-#define MV64340_PCI_1_CS_1_BASE_ADDR_REMAP			    0xdc8
-#define MV64340_PCI_0_CS_2_BASE_ADDR_REMAP			    0xc4c
-#define MV64340_PCI_1_CS_2_BASE_ADDR_REMAP			    0xccc
-#define MV64340_PCI_0_CS_3_BASE_ADDR_REMAP			    0xd4c
-#define MV64340_PCI_1_CS_3_BASE_ADDR_REMAP			    0xdcc
-#define MV64340_PCI_0_CS_0_BASE_HIGH_ADDR_REMAP			    0xF04
-#define MV64340_PCI_1_CS_0_BASE_HIGH_ADDR_REMAP			    0xF84
-#define MV64340_PCI_0_CS_1_BASE_HIGH_ADDR_REMAP			    0xF08
-#define MV64340_PCI_1_CS_1_BASE_HIGH_ADDR_REMAP			    0xF88
-#define MV64340_PCI_0_CS_2_BASE_HIGH_ADDR_REMAP			    0xF0C
-#define MV64340_PCI_1_CS_2_BASE_HIGH_ADDR_REMAP			    0xF8C
-#define MV64340_PCI_0_CS_3_BASE_HIGH_ADDR_REMAP			    0xF10
-#define MV64340_PCI_1_CS_3_BASE_HIGH_ADDR_REMAP			    0xF90
-#define MV64340_PCI_0_DEVCS_0_BASE_ADDR_REMAP			    0xc50
-#define MV64340_PCI_1_DEVCS_0_BASE_ADDR_REMAP			    0xcd0
-#define MV64340_PCI_0_DEVCS_1_BASE_ADDR_REMAP			    0xd50
-#define MV64340_PCI_1_DEVCS_1_BASE_ADDR_REMAP			    0xdd0
-#define MV64340_PCI_0_DEVCS_2_BASE_ADDR_REMAP			    0xd58
-#define MV64340_PCI_1_DEVCS_2_BASE_ADDR_REMAP			    0xdd8
-#define MV64340_PCI_0_DEVCS_3_BASE_ADDR_REMAP           	    0xc54
-#define MV64340_PCI_1_DEVCS_3_BASE_ADDR_REMAP           	    0xcd4
-#define MV64340_PCI_0_DEVCS_BOOTCS_BASE_ADDR_REMAP      	    0xd54
-#define MV64340_PCI_1_DEVCS_BOOTCS_BASE_ADDR_REMAP      	    0xdd4
-#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_LOW                  0xd5c
-#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_LOW                  0xddc
-#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_HIGH                 0xd60
-#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_HIGH                 0xde0
-#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_LOW                  0xd64
-#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_LOW                  0xde4
-#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_HIGH                 0xd68
-#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_HIGH                 0xde8
-#define MV64340_PCI_0_P2P_I_O_BASE_ADDR_REMAP                       0xd6c
-#define MV64340_PCI_1_P2P_I_O_BASE_ADDR_REMAP                       0xdec 
-#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_LOW                       0xd70
-#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_LOW                       0xdf0
-#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_HIGH                      0xd74
-#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_HIGH                      0xdf4
-#define MV64340_PCI_0_INTEGRATED_SRAM_BASE_ADDR_REMAP               0xf00
-#define MV64340_PCI_1_INTEGRATED_SRAM_BASE_ADDR_REMAP               0xf80
-#define MV64340_PCI_0_EXPANSION_ROM_BASE_ADDR_REMAP                 0xf38
-#define MV64340_PCI_1_EXPANSION_ROM_BASE_ADDR_REMAP                 0xfb8
-#define MV64340_PCI_0_ADDR_DECODE_CONTROL                           0xd3c
-#define MV64340_PCI_1_ADDR_DECODE_CONTROL                           0xdbc
-#define MV64340_PCI_0_HEADERS_RETARGET_CONTROL                      0xF40
-#define MV64340_PCI_1_HEADERS_RETARGET_CONTROL                      0xFc0
-#define MV64340_PCI_0_HEADERS_RETARGET_BASE                         0xF44
-#define MV64340_PCI_1_HEADERS_RETARGET_BASE                         0xFc4
-#define MV64340_PCI_0_HEADERS_RETARGET_HIGH                         0xF48
-#define MV64340_PCI_1_HEADERS_RETARGET_HIGH                         0xFc8
-
-/***********************************/
-/*   PCI Control Register Map      */
-/***********************************/
-
-#define MV64340_PCI_0_DLL_STATUS_AND_COMMAND                        0x1d20
-#define MV64340_PCI_1_DLL_STATUS_AND_COMMAND                        0x1da0
-#define MV64340_PCI_0_MPP_PADS_DRIVE_CONTROL                        0x1d1C
-#define MV64340_PCI_1_MPP_PADS_DRIVE_CONTROL                        0x1d9C
-#define MV64340_PCI_0_COMMAND			         	    0xc00
-#define MV64340_PCI_1_COMMAND					    0xc80
-#define MV64340_PCI_0_MODE                                          0xd00
-#define MV64340_PCI_1_MODE                                          0xd80
-#define MV64340_PCI_0_RETRY	        	 		    0xc04
-#define MV64340_PCI_1_RETRY				            0xc84
-#define MV64340_PCI_0_READ_BUFFER_DISCARD_TIMER                     0xd04
-#define MV64340_PCI_1_READ_BUFFER_DISCARD_TIMER                     0xd84
-#define MV64340_PCI_0_MSI_TRIGGER_TIMER                             0xc38
-#define MV64340_PCI_1_MSI_TRIGGER_TIMER                             0xcb8
-#define MV64340_PCI_0_ARBITER_CONTROL                               0x1d00
-#define MV64340_PCI_1_ARBITER_CONTROL                               0x1d80
-#define MV64340_PCI_0_CROSS_BAR_CONTROL_LOW                         0x1d08
-#define MV64340_PCI_1_CROSS_BAR_CONTROL_LOW                         0x1d88
-#define MV64340_PCI_0_CROSS_BAR_CONTROL_HIGH                        0x1d0c
-#define MV64340_PCI_1_CROSS_BAR_CONTROL_HIGH                        0x1d8c
-#define MV64340_PCI_0_CROSS_BAR_TIMEOUT                             0x1d04
-#define MV64340_PCI_1_CROSS_BAR_TIMEOUT                             0x1d84
-#define MV64340_PCI_0_SYNC_BARRIER_TRIGGER_REG                      0x1D18
-#define MV64340_PCI_1_SYNC_BARRIER_TRIGGER_REG                      0x1D98
-#define MV64340_PCI_0_SYNC_BARRIER_VIRTUAL_REG                      0x1d10
-#define MV64340_PCI_1_SYNC_BARRIER_VIRTUAL_REG                      0x1d90
-#define MV64340_PCI_0_P2P_CONFIG                                    0x1d14
-#define MV64340_PCI_1_P2P_CONFIG                                    0x1d94
-
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_LOW                     0x1e00
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_HIGH                    0x1e04
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_0                         0x1e08
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_LOW                     0x1e10
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_HIGH                    0x1e14
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_1                         0x1e18
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_LOW                     0x1e20
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_HIGH                    0x1e24
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_2                         0x1e28
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_LOW                     0x1e30
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_HIGH                    0x1e34
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_3                         0x1e38
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_LOW                     0x1e40
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_HIGH                    0x1e44
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_4                         0x1e48
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_LOW                     0x1e50
-#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_HIGH                    0x1e54
-#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_5                         0x1e58
-
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_LOW                     0x1e80
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_HIGH                    0x1e84
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_0                         0x1e88
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_LOW                     0x1e90
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_HIGH                    0x1e94
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_1                         0x1e98
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_LOW                     0x1ea0
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_HIGH                    0x1ea4
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_2                         0x1ea8
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_LOW                     0x1eb0
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_HIGH                    0x1eb4
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_3                         0x1eb8
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_LOW                     0x1ec0
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_HIGH                    0x1ec4
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_4                         0x1ec8
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_LOW                     0x1ed0
-#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_HIGH                    0x1ed4
-#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_5                         0x1ed8
-
-/****************************************/
-/*   PCI Configuration Access Registers */
-/****************************************/
-
-#define MV64340_PCI_0_CONFIG_ADDR 				    0xcf8
-#define MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG                       0xcfc
-#define MV64340_PCI_1_CONFIG_ADDR 				    0xc78
-#define MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG                       0xc7c
-#define MV64340_PCI_0_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG	            0xc34
-#define MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG	            0xcb4
-
-/****************************************/
-/*   PCI Error Report Registers         */
-/****************************************/
-
-#define MV64340_PCI_0_SERR_MASK					    0xc28
-#define MV64340_PCI_1_SERR_MASK					    0xca8
-#define MV64340_PCI_0_ERROR_ADDR_LOW                                0x1d40
-#define MV64340_PCI_1_ERROR_ADDR_LOW                                0x1dc0
-#define MV64340_PCI_0_ERROR_ADDR_HIGH                               0x1d44
-#define MV64340_PCI_1_ERROR_ADDR_HIGH                               0x1dc4
-#define MV64340_PCI_0_ERROR_ATTRIBUTE                               0x1d48
-#define MV64340_PCI_1_ERROR_ATTRIBUTE                               0x1dc8
-#define MV64340_PCI_0_ERROR_COMMAND                                 0x1d50
-#define MV64340_PCI_1_ERROR_COMMAND                                 0x1dd0
-#define MV64340_PCI_0_ERROR_CAUSE                                   0x1d58
-#define MV64340_PCI_1_ERROR_CAUSE                                   0x1dd8
-#define MV64340_PCI_0_ERROR_MASK                                    0x1d5c
-#define MV64340_PCI_1_ERROR_MASK                                    0x1ddc
-
-/****************************************/
-/*   PCI Debug Registers                */
-/****************************************/
-
-#define MV64340_PCI_0_MMASK                                         0X1D24
-#define MV64340_PCI_1_MMASK                                         0X1DA4
-
-/*********************************************/
-/* PCI Configuration, Function 0, Registers  */
-/*********************************************/
-
-#define MV64340_PCI_DEVICE_AND_VENDOR_ID 			    0x000
-#define MV64340_PCI_STATUS_AND_COMMAND				    0x004
-#define MV64340_PCI_CLASS_CODE_AND_REVISION_ID			    0x008
-#define MV64340_PCI_BIST_HEADER_TYPE_LATENCY_TIMER_CACHE_LINE 	    0x00C
-
-#define MV64340_PCI_SCS_0_BASE_ADDR_LOW   	      		    0x010
-#define MV64340_PCI_SCS_0_BASE_ADDR_HIGH   		            0x014
-#define MV64340_PCI_SCS_1_BASE_ADDR_LOW  	     	            0x018
-#define MV64340_PCI_SCS_1_BASE_ADDR_HIGH 		            0x01C
-#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_LOW      	    0x020
-#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_HIGH     	    0x024
-#define MV64340_PCI_SUBSYSTEM_ID_AND_SUBSYSTEM_VENDOR_ID	    0x02c
-#define MV64340_PCI_EXPANSION_ROM_BASE_ADDR_REG	                    0x030
-#define MV64340_PCI_CAPABILTY_LIST_POINTER                          0x034
-#define MV64340_PCI_INTERRUPT_PIN_AND_LINE 			    0x03C
-       /* capability list */
-#define MV64340_PCI_POWER_MANAGEMENT_CAPABILITY                     0x040
-#define MV64340_PCI_POWER_MANAGEMENT_STATUS_AND_CONTROL             0x044
-#define MV64340_PCI_VPD_ADDR                                        0x048
-#define MV64340_PCI_VPD_DATA                                        0x04c
-#define MV64340_PCI_MSI_MESSAGE_CONTROL                             0x050
-#define MV64340_PCI_MSI_MESSAGE_ADDR                                0x054
-#define MV64340_PCI_MSI_MESSAGE_UPPER_ADDR                          0x058
-#define MV64340_PCI_MSI_MESSAGE_DATA                                0x05c
-#define MV64340_PCI_X_COMMAND                                       0x060
-#define MV64340_PCI_X_STATUS                                        0x064
-#define MV64340_PCI_COMPACT_PCI_HOT_SWAP                            0x068
-
-/***********************************************/
-/*   PCI Configuration, Function 1, Registers  */
-/***********************************************/
-
-#define MV64340_PCI_SCS_2_BASE_ADDR_LOW   			    0x110
-#define MV64340_PCI_SCS_2_BASE_ADDR_HIGH			    0x114
-#define MV64340_PCI_SCS_3_BASE_ADDR_LOW 			    0x118
-#define MV64340_PCI_SCS_3_BASE_ADDR_HIGH			    0x11c
-#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_LOW          	    0x120
-#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_HIGH         	    0x124
-
-/***********************************************/
-/*  PCI Configuration, Function 2, Registers   */
-/***********************************************/
-
-#define MV64340_PCI_DEVCS_0_BASE_ADDR_LOW	    		    0x210
-#define MV64340_PCI_DEVCS_0_BASE_ADDR_HIGH 			    0x214
-#define MV64340_PCI_DEVCS_1_BASE_ADDR_LOW 			    0x218
-#define MV64340_PCI_DEVCS_1_BASE_ADDR_HIGH      		    0x21c
-#define MV64340_PCI_DEVCS_2_BASE_ADDR_LOW 			    0x220
-#define MV64340_PCI_DEVCS_2_BASE_ADDR_HIGH      		    0x224
-
-/***********************************************/
-/*  PCI Configuration, Function 3, Registers   */
-/***********************************************/
-
-#define MV64340_PCI_DEVCS_3_BASE_ADDR_LOW	    		    0x310
-#define MV64340_PCI_DEVCS_3_BASE_ADDR_HIGH 			    0x314
-#define MV64340_PCI_BOOT_CS_BASE_ADDR_LOW			    0x318
-#define MV64340_PCI_BOOT_CS_BASE_ADDR_HIGH      		    0x31c
-#define MV64340_PCI_CPU_BASE_ADDR_LOW 				    0x220
-#define MV64340_PCI_CPU_BASE_ADDR_HIGH      			    0x224
-
-/***********************************************/
-/*  PCI Configuration, Function 4, Registers   */
-/***********************************************/
-
-#define MV64340_PCI_P2P_MEM0_BASE_ADDR_LOW  			    0x410
-#define MV64340_PCI_P2P_MEM0_BASE_ADDR_HIGH 			    0x414
-#define MV64340_PCI_P2P_MEM1_BASE_ADDR_LOW   			    0x418
-#define MV64340_PCI_P2P_MEM1_BASE_ADDR_HIGH 			    0x41c
-#define MV64340_PCI_P2P_I_O_BASE_ADDR                 	            0x420
-#define MV64340_PCI_INTERNAL_REGS_I_O_MAPPED_BASE_ADDR              0x424
-
-/****************************************/
-/* Messaging Unit Registers (I20)   	*/
-/****************************************/
-
-#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_0_SIDE		    0x010
-#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_0_SIDE  		    0x014
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_0_SIDE 		    0x018
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_0_SIDE  		    0x01C
-#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_0_SIDE  		    0x020
-#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE          0x024
-#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE	    0x028
-#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_0_SIDE 		    0x02C
-#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE         0x030
-#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE          0x034
-#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE       0x040
-#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE      0x044
-#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_0_SIDE 		    0x050
-#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_0_SIDE 		    0x054
-#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE        0x060
-#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE        0x064
-#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE        0x068
-#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE        0x06C
-#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE       0x070
-#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE       0x074
-#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE       0x0F8
-#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE       0x0FC
-
-#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_1_SIDE		    0x090
-#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_1_SIDE  		    0x094
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_1_SIDE 		    0x098
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_1_SIDE  		    0x09C
-#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_1_SIDE  		    0x0A0
-#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE          0x0A4
-#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE	    0x0A8
-#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_1_SIDE 		    0x0AC
-#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE         0x0B0
-#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE          0x0B4
-#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE       0x0C0
-#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE      0x0C4
-#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_1_SIDE 		    0x0D0
-#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_1_SIDE 		    0x0D4
-#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE        0x0E0
-#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE        0x0E4
-#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE        0x0E8
-#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE        0x0EC
-#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE       0x0F0
-#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE       0x0F4
-#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE       0x078
-#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE       0x07C
-
-#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU0_SIDE		    0x1C10
-#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU0_SIDE  		    0x1C14
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU0_SIDE 		    0x1C18
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU0_SIDE  		    0x1C1C
-#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU0_SIDE  		    0x1C20
-#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE  	    0x1C24
-#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU0_SIDE	    0x1C28
-#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU0_SIDE 		    0x1C2C
-#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE          0x1C30
-#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU0_SIDE           0x1C34
-#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE        0x1C40
-#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE       0x1C44
-#define MV64340_I2O_QUEUE_CONTROL_REG_CPU0_SIDE 		    0x1C50
-#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU0_SIDE 		    0x1C54
-#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE         0x1C60
-#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE         0x1C64
-#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE         0x1C68
-#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE         0x1C6C
-#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE        0x1C70
-#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE        0x1C74
-#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE        0x1CF8
-#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE        0x1CFC
-#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU1_SIDE		    0x1C90
-#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU1_SIDE  		    0x1C94
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU1_SIDE 		    0x1C98
-#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU1_SIDE  		    0x1C9C
-#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU1_SIDE  		    0x1CA0
-#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE  	    0x1CA4
-#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU1_SIDE	    0x1CA8
-#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU1_SIDE 		    0x1CAC
-#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE          0x1CB0
-#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU1_SIDE           0x1CB4
-#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE        0x1CC0
-#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE       0x1CC4
-#define MV64340_I2O_QUEUE_CONTROL_REG_CPU1_SIDE 		    0x1CD0
-#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU1_SIDE 		    0x1CD4
-#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE         0x1CE0
-#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE         0x1CE4
-#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE         0x1CE8
-#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE         0x1CEC
-#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE        0x1CF0
-#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE        0x1CF4
-#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE        0x1C78
-#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE        0x1C7C
-
-/****************************************/
-/*        Ethernet Unit Registers  		*/
-/****************************************/
-
-/*******************************************/
-/*          CUNIT  Registers               */
-/*******************************************/
-
-         /* Address Decoding Register Map */
-           
-#define MV64340_CUNIT_BASE_ADDR_REG0                                0xf200
-#define MV64340_CUNIT_BASE_ADDR_REG1                                0xf208
-#define MV64340_CUNIT_BASE_ADDR_REG2                                0xf210
-#define MV64340_CUNIT_BASE_ADDR_REG3                                0xf218
-#define MV64340_CUNIT_SIZE0                                         0xf204
-#define MV64340_CUNIT_SIZE1                                         0xf20c
-#define MV64340_CUNIT_SIZE2                                         0xf214
-#define MV64340_CUNIT_SIZE3                                         0xf21c
-#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG0                          0xf240
-#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG1                          0xf244
-#define MV64340_CUNIT_BASE_ADDR_ENABLE_REG                          0xf250
-#define MV64340_MPSC0_ACCESS_PROTECTION_REG                         0xf254
-#define MV64340_MPSC1_ACCESS_PROTECTION_REG                         0xf258
-#define MV64340_CUNIT_INTERNAL_SPACE_BASE_ADDR_REG                  0xf25C
-
-        /*  Error Report Registers  */
-
-#define MV64340_CUNIT_INTERRUPT_CAUSE_REG                           0xf310
-#define MV64340_CUNIT_INTERRUPT_MASK_REG                            0xf314
-#define MV64340_CUNIT_ERROR_ADDR                                    0xf318
-
-        /*  Cunit Control Registers */
-
-#define MV64340_CUNIT_ARBITER_CONTROL_REG                           0xf300
-#define MV64340_CUNIT_CONFIG_REG                                    0xb40c
-#define MV64340_CUNIT_CRROSBAR_TIMEOUT_REG                          0xf304
-
-        /*  Cunit Debug Registers   */
-
-#define MV64340_CUNIT_DEBUG_LOW                                     0xf340
-#define MV64340_CUNIT_DEBUG_HIGH                                    0xf344
-#define MV64340_CUNIT_MMASK                                         0xf380
-
-        /*  MPSCs Clocks Routing Registers  */
-
-#define MV64340_MPSC_ROUTING_REG                                    0xb400
-#define MV64340_MPSC_RX_CLOCK_ROUTING_REG                           0xb404
-#define MV64340_MPSC_TX_CLOCK_ROUTING_REG                           0xb408
-
-        /*  MPSCs Interrupts Registers    */
-
-#define MV64340_MPSC_CAUSE_REG(port)                               (0xb804 + (port<<3))
-#define MV64340_MPSC_MASK_REG(port)                                (0xb884 + (port<<3))
- 
-#define MV64340_MPSC_MAIN_CONFIG_LOW(port)                         (0x8000 + (port<<12))
-#define MV64340_MPSC_MAIN_CONFIG_HIGH(port)                        (0x8004 + (port<<12))    
-#define MV64340_MPSC_PROTOCOL_CONFIG(port)                         (0x8008 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG1(port)                            (0x800c + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG2(port)                            (0x8010 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG3(port)                            (0x8014 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG4(port)                            (0x8018 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG5(port)                            (0x801c + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG6(port)                            (0x8020 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG7(port)                            (0x8024 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG8(port)                            (0x8028 + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG9(port)                            (0x802c + (port<<12))    
-#define MV64340_MPSC_CHANNEL_REG10(port)                           (0x8030 + (port<<12))    
-        
-        /*  MPSC0 Registers      */
-
-
-/***************************************/
-/*          SDMA Registers             */
-/***************************************/
-
-#define MV64340_SDMA_CONFIG_REG(channel)                        (0x4000 + (channel<<13))        
-#define MV64340_SDMA_COMMAND_REG(channel)                       (0x4008 + (channel<<13))        
-#define MV64340_SDMA_CURRENT_RX_DESCRIPTOR_POINTER(channel)     (0x4810 + (channel<<13))        
-#define MV64340_SDMA_CURRENT_TX_DESCRIPTOR_POINTER(channel)     (0x4c10 + (channel<<13))        
-#define MV64340_SDMA_FIRST_TX_DESCRIPTOR_POINTER(channel)       (0x4c14 + (channel<<13)) 
-
-#define MV64340_SDMA_CAUSE_REG                                      0xb800
-#define MV64340_SDMA_MASK_REG                                       0xb880
-         
-/* BRG Interrupts */
-
-#define MV64340_BRG_CONFIG_REG(brg)                              (0xb200 + (brg<<3))
-#define MV64340_BRG_BAUDE_TUNING_REG(brg)                        (0xb208 + (brg<<3))
-#define MV64340_BRG_CAUSE_REG                                       0xb834
-#define MV64340_BRG_MASK_REG                                        0xb8b4
-
-/****************************************/
-/* DMA Channel Control			*/
-/****************************************/
-
-#define MV64340_DMA_CHANNEL0_CONTROL 				    0x840
-#define MV64340_DMA_CHANNEL0_CONTROL_HIGH			    0x880
-#define MV64340_DMA_CHANNEL1_CONTROL 				    0x844
-#define MV64340_DMA_CHANNEL1_CONTROL_HIGH			    0x884
-#define MV64340_DMA_CHANNEL2_CONTROL 				    0x848
-#define MV64340_DMA_CHANNEL2_CONTROL_HIGH			    0x888
-#define MV64340_DMA_CHANNEL3_CONTROL 				    0x84C
-#define MV64340_DMA_CHANNEL3_CONTROL_HIGH			    0x88C
-
-
-/****************************************/
-/*           IDMA Registers             */
-/****************************************/
-
-#define MV64340_DMA_CHANNEL0_BYTE_COUNT                             0x800
-#define MV64340_DMA_CHANNEL1_BYTE_COUNT                             0x804
-#define MV64340_DMA_CHANNEL2_BYTE_COUNT                             0x808
-#define MV64340_DMA_CHANNEL3_BYTE_COUNT                             0x80C
-#define MV64340_DMA_CHANNEL0_SOURCE_ADDR                            0x810
-#define MV64340_DMA_CHANNEL1_SOURCE_ADDR                            0x814
-#define MV64340_DMA_CHANNEL2_SOURCE_ADDR                            0x818
-#define MV64340_DMA_CHANNEL3_SOURCE_ADDR                            0x81c
-#define MV64340_DMA_CHANNEL0_DESTINATION_ADDR                       0x820
-#define MV64340_DMA_CHANNEL1_DESTINATION_ADDR                       0x824
-#define MV64340_DMA_CHANNEL2_DESTINATION_ADDR                       0x828
-#define MV64340_DMA_CHANNEL3_DESTINATION_ADDR                       0x82C
-#define MV64340_DMA_CHANNEL0_NEXT_DESCRIPTOR_POINTER                0x830
-#define MV64340_DMA_CHANNEL1_NEXT_DESCRIPTOR_POINTER                0x834
-#define MV64340_DMA_CHANNEL2_NEXT_DESCRIPTOR_POINTER                0x838
-#define MV64340_DMA_CHANNEL3_NEXT_DESCRIPTOR_POINTER                0x83C
-#define MV64340_DMA_CHANNEL0_CURRENT_DESCRIPTOR_POINTER             0x870
-#define MV64340_DMA_CHANNEL1_CURRENT_DESCRIPTOR_POINTER             0x874
-#define MV64340_DMA_CHANNEL2_CURRENT_DESCRIPTOR_POINTER             0x878
-#define MV64340_DMA_CHANNEL3_CURRENT_DESCRIPTOR_POINTER             0x87C
-
- /*  IDMA Address Decoding Base Address Registers  */
- 
-#define MV64340_DMA_BASE_ADDR_REG0                                  0xa00
-#define MV64340_DMA_BASE_ADDR_REG1                                  0xa08
-#define MV64340_DMA_BASE_ADDR_REG2                                  0xa10
-#define MV64340_DMA_BASE_ADDR_REG3                                  0xa18
-#define MV64340_DMA_BASE_ADDR_REG4                                  0xa20
-#define MV64340_DMA_BASE_ADDR_REG5                                  0xa28
-#define MV64340_DMA_BASE_ADDR_REG6                                  0xa30
-#define MV64340_DMA_BASE_ADDR_REG7                                  0xa38
- 
- /*  IDMA Address Decoding Size Address Register   */
- 
-#define MV64340_DMA_SIZE_REG0                                       0xa04
-#define MV64340_DMA_SIZE_REG1                                       0xa0c
-#define MV64340_DMA_SIZE_REG2                                       0xa14
-#define MV64340_DMA_SIZE_REG3                                       0xa1c
-#define MV64340_DMA_SIZE_REG4                                       0xa24
-#define MV64340_DMA_SIZE_REG5                                       0xa2c
-#define MV64340_DMA_SIZE_REG6                                       0xa34
-#define MV64340_DMA_SIZE_REG7                                       0xa3C
-
- /* IDMA Address Decoding High Address Remap and Access 
-                  Protection Registers                    */
-                  
-#define MV64340_DMA_HIGH_ADDR_REMAP_REG0                            0xa60
-#define MV64340_DMA_HIGH_ADDR_REMAP_REG1                            0xa64
-#define MV64340_DMA_HIGH_ADDR_REMAP_REG2                            0xa68
-#define MV64340_DMA_HIGH_ADDR_REMAP_REG3                            0xa6C
-#define MV64340_DMA_BASE_ADDR_ENABLE_REG                            0xa80
-#define MV64340_DMA_CHANNEL0_ACCESS_PROTECTION_REG                  0xa70
-#define MV64340_DMA_CHANNEL1_ACCESS_PROTECTION_REG                  0xa74
-#define MV64340_DMA_CHANNEL2_ACCESS_PROTECTION_REG                  0xa78
-#define MV64340_DMA_CHANNEL3_ACCESS_PROTECTION_REG                  0xa7c
-#define MV64340_DMA_ARBITER_CONTROL                                 0x860
-#define MV64340_DMA_CROSS_BAR_TIMEOUT                               0x8d0
-
- /*  IDMA Headers Retarget Registers   */
-
-#define MV64340_DMA_HEADERS_RETARGET_CONTROL                        0xa84
-#define MV64340_DMA_HEADERS_RETARGET_BASE                           0xa88
-
- /*  IDMA Interrupt Register  */
-
-#define MV64340_DMA_INTERRUPT_CAUSE_REG                             0x8c0
-#define MV64340_DMA_INTERRUPT_CAUSE_MASK                            0x8c4
-#define MV64340_DMA_ERROR_ADDR                                      0x8c8
-#define MV64340_DMA_ERROR_SELECT                                    0x8cc
-
- /*  IDMA Debug Register ( for internal use )    */
-
-#define MV64340_DMA_DEBUG_LOW                                       0x8e0
-#define MV64340_DMA_DEBUG_HIGH                                      0x8e4
-#define MV64340_DMA_SPARE                                           0xA8C
-
-/****************************************/
-/* Timer_Counter 			*/
-/****************************************/
-
-#define MV64340_TIMER_COUNTER0					    0x850
-#define MV64340_TIMER_COUNTER1					    0x854
-#define MV64340_TIMER_COUNTER2					    0x858
-#define MV64340_TIMER_COUNTER3					    0x85C
-#define MV64340_TIMER_COUNTER_0_3_CONTROL			    0x864
-#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_CAUSE		    0x868
-#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_MASK      		    0x86c
-
-/****************************************/
-/*         Watchdog registers  	        */
-/****************************************/
-
-#define MV64340_WATCHDOG_CONFIG_REG                                 0xb410
-#define MV64340_WATCHDOG_VALUE_REG                                  0xb414
-
-/****************************************/
-/* I2C Registers                        */
-/****************************************/
-
-#define MV64XXX_I2C_OFFSET                                          0xc000
-#define MV64XXX_I2C_REG_BLOCK_SIZE                                  0x0020
-
-/****************************************/
-/* GPP Interface Registers              */
-/****************************************/
-
-#define MV64340_GPP_IO_CONTROL                                      0xf100
-#define MV64340_GPP_LEVEL_CONTROL                                   0xf110
-#define MV64340_GPP_VALUE                                           0xf104
-#define MV64340_GPP_INTERRUPT_CAUSE                                 0xf108
-#define MV64340_GPP_INTERRUPT_MASK0                                 0xf10c
-#define MV64340_GPP_INTERRUPT_MASK1                                 0xf114
-#define MV64340_GPP_VALUE_SET                                       0xf118
-#define MV64340_GPP_VALUE_CLEAR                                     0xf11c
-
-/****************************************/
-/* Interrupt Controller Registers       */
-/****************************************/
-
-/****************************************/
-/* Interrupts	  			*/
-/****************************************/
-
-#define MV64340_MAIN_INTERRUPT_CAUSE_LOW                            0x004
-#define MV64340_MAIN_INTERRUPT_CAUSE_HIGH                           0x00c
-#define MV64340_CPU_INTERRUPT0_MASK_LOW                             0x014
-#define MV64340_CPU_INTERRUPT0_MASK_HIGH                            0x01c
-#define MV64340_CPU_INTERRUPT0_SELECT_CAUSE                         0x024
-#define MV64340_CPU_INTERRUPT1_MASK_LOW                             0x034
-#define MV64340_CPU_INTERRUPT1_MASK_HIGH                            0x03c
-#define MV64340_CPU_INTERRUPT1_SELECT_CAUSE                         0x044
-#define MV64340_INTERRUPT0_MASK_0_LOW                               0x054
-#define MV64340_INTERRUPT0_MASK_0_HIGH                              0x05c
-#define MV64340_INTERRUPT0_SELECT_CAUSE                             0x064
-#define MV64340_INTERRUPT1_MASK_0_LOW                               0x074
-#define MV64340_INTERRUPT1_MASK_0_HIGH                              0x07c
-#define MV64340_INTERRUPT1_SELECT_CAUSE                             0x084
-
-/****************************************/
-/*      MPP Interface Registers         */
-/****************************************/
-
-#define MV64340_MPP_CONTROL0                                        0xf000
-#define MV64340_MPP_CONTROL1                                        0xf004
-#define MV64340_MPP_CONTROL2                                        0xf008
-#define MV64340_MPP_CONTROL3                                        0xf00c
-
-/****************************************/
-/*    Serial Initialization registers   */
-/****************************************/
-
-#define MV64340_SERIAL_INIT_LAST_DATA                               0xf324
-#define MV64340_SERIAL_INIT_CONTROL                                 0xf328
-#define MV64340_SERIAL_INIT_STATUS                                  0xf32c
-
-extern void mv64340_irq_init(unsigned int base);
-
-#endif /* __ASM_MV643XX_H */
-- 
cgit v1.2.3


From 636119af94f2fbf3e4458be66a1bc740ba69ce6d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 14 Sep 2024 08:51:15 -0600
Subject: io_uring: rename "copy buffers" to "clone buffers"

A recent commit added support for copying registered buffers from one
ring to another. But that term is a bit confusing, as no copying of
buffer data is done here. What is being done is simply cloning the
buffer registrations from one ring to another.

Rename it while we still can, so that it's more descriptive. No
functional changes in this patch.

Fixes: 7cc2a6eadcd7 ("io_uring: add IORING_REGISTER_COPY_BUFFERS method")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9dc5bb428c8a..1fe79e750470 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -609,8 +609,8 @@ enum io_uring_register_op {
 
 	IORING_REGISTER_CLOCK			= 29,
 
-	/* copy registered buffers from source ring to current ring */
-	IORING_REGISTER_COPY_BUFFERS		= 30,
+	/* clone registered buffers from source ring to current ring */
+	IORING_REGISTER_CLONE_BUFFERS		= 30,
 
 	/* this goes last */
 	IORING_REGISTER_LAST,
@@ -701,7 +701,7 @@ enum {
 	IORING_REGISTER_SRC_REGISTERED = 1,
 };
 
-struct io_uring_copy_buffers {
+struct io_uring_clone_buffers {
 	__u32	src_fd;
 	__u32	flags;
 	__u32	pad[6];
-- 
cgit v1.2.3


From 21d52e295ad2afc76bbd105da82a003b96f6ac77 Mon Sep 17 00:00:00 2001
From: Tahera Fahimi <fahimitahera@gmail.com>
Date: Wed, 4 Sep 2024 18:13:55 -0600
Subject: landlock: Add abstract UNIX socket scoping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a new "scoped" member to landlock_ruleset_attr that can
specify LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET to restrict connection to
abstract UNIX sockets from a process outside of the socket's domain.

Two hooks are implemented to enforce these restrictions:
unix_stream_connect and unix_may_send.

Closes: https://github.com/landlock-lsm/linux/issues/7
Signed-off-by: Tahera Fahimi <fahimitahera@gmail.com>
Link: https://lore.kernel.org/r/5f7ad85243b78427242275b93481cfc7c127764b.1725494372.git.fahimitahera@gmail.com
[mic: Fix commit message formatting, improve documentation, simplify
hook_unix_may_send(), and cosmetic fixes including rename of
LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET]
Co-developed-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 2c8dbc74b955..70edd17bafdc 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -44,6 +44,12 @@ struct landlock_ruleset_attr {
 	 * flags`_).
 	 */
 	__u64 handled_access_net;
+	/**
+	 * @scoped: Bitmask of scopes (cf. `Scope flags`_)
+	 * restricting a Landlock domain from accessing outside
+	 * resources (e.g. IPCs).
+	 */
+	__u64 scoped;
 };
 
 /*
@@ -274,4 +280,25 @@ struct landlock_net_port_attr {
 #define LANDLOCK_ACCESS_NET_BIND_TCP			(1ULL << 0)
 #define LANDLOCK_ACCESS_NET_CONNECT_TCP			(1ULL << 1)
 /* clang-format on */
+
+/**
+ * DOC: scope
+ *
+ * Scope flags
+ * ~~~~~~~~~~~
+ *
+ * These flags enable to isolate a sandboxed process from a set of IPC actions.
+ * Setting a flag for a ruleset will isolate the Landlock domain to forbid
+ * connections to resources outside the domain.
+ *
+ * Scopes:
+ *
+ * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from
+ *   connecting to an abstract UNIX socket created by a process outside the
+ *   related Landlock domain (e.g. a parent domain or a non-sandboxed process).
+ */
+/* clang-format off */
+#define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET		(1ULL << 0)
+/* clang-format on*/
+
 #endif /* _UAPI_LINUX_LANDLOCK_H */
-- 
cgit v1.2.3


From 54a6e6bbf3bef25c8eb65619edde70af49bd3db0 Mon Sep 17 00:00:00 2001
From: Tahera Fahimi <fahimitahera@gmail.com>
Date: Fri, 6 Sep 2024 15:30:03 -0600
Subject: landlock: Add signal scoping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, a sandbox process is not restricted to sending a signal (e.g.
SIGKILL) to a process outside the sandbox environment.  The ability to
send a signal for a sandboxed process should be scoped the same way
abstract UNIX sockets are scoped. Therefore, we extend the "scoped"
field in a ruleset with LANDLOCK_SCOPE_SIGNAL to specify that a ruleset
will deny sending any signal from within a sandbox process to its parent
(i.e. any parent sandbox or non-sandboxed processes).

This patch adds file_set_fowner and file_free_security hooks to set and
release a pointer to the file owner's domain. This pointer, fown_domain
in landlock_file_security will be used in file_send_sigiotask to check
if the process can send a signal.

The ruleset_with_unknown_scope test is updated to support
LANDLOCK_SCOPE_SIGNAL.

This depends on two new changes:
- commit 1934b212615d ("file: reclaim 24 bytes from f_owner"): replace
  container_of(fown, struct file, f_owner) with fown->file .
- commit 26f204380a3c ("fs: Fix file_set_fowner LSM hook
  inconsistencies"): lock before calling the hook.

Signed-off-by: Tahera Fahimi <fahimitahera@gmail.com>
Closes: https://github.com/landlock-lsm/linux/issues/8
Link: https://lore.kernel.org/r/df2b4f880a2ed3042992689a793ea0951f6798a5.1725657727.git.fahimitahera@gmail.com
[mic: Update landlock_get_current_domain()'s return type, improve and
fix locking in hook_file_set_fowner(), simplify and fix sleepable call
and locking issue in hook_file_send_sigiotask() and rebase on the latest
VFS tree, simplify hook_task_kill() and quickly return when not
sandboxed, improve comments, rename LANDLOCK_SCOPED_SIGNAL]
Co-developed-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 70edd17bafdc..33745642f787 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -296,9 +296,12 @@ struct landlock_net_port_attr {
  * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from
  *   connecting to an abstract UNIX socket created by a process outside the
  *   related Landlock domain (e.g. a parent domain or a non-sandboxed process).
+ * - %LANDLOCK_SCOPE_SIGNAL: Restrict a sandboxed process from sending a signal
+ *   to another process outside the domain.
  */
 /* clang-format off */
 #define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET		(1ULL << 0)
+#define LANDLOCK_SCOPE_SIGNAL		                (1ULL << 1)
 /* clang-format on*/
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
-- 
cgit v1.2.3


From 4208c562a27899212e8046080555e0f204e0579a Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Tue, 17 Sep 2024 10:24:57 +0530
Subject: block: remove bogus union

The union around bi_integrity field is pointless.
Remove it.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20240917045457.429698-1-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 36ed96133217..6d3a0fff2a1d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -248,11 +248,9 @@ struct bio {
 	struct bio_crypt_ctx	*bi_crypt_context;
 #endif
 
-	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-		struct bio_integrity_payload *bi_integrity; /* data integrity */
+	struct bio_integrity_payload *bi_integrity; /* data integrity */
 #endif
-	};
 
 	unsigned short		bi_vcnt;	/* how many bio_vec's */
 
-- 
cgit v1.2.3


From 6857be5fecaebd9773ff27b6d29b6fff3b1abbce Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:35 -0400
Subject: mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud

Patch series "mm: Support huge pfnmaps", v2.

Overview
========

This series implements huge pfnmaps support for mm in general.  Huge
pfnmap allows e.g.  VM_PFNMAP vmas to map in either PMD or PUD levels,
similar to what we do with dax / thp / hugetlb so far to benefit from TLB
hits.  Now we extend that idea to PFN mappings, e.g.  PCI MMIO bars where
it can grow as large as 8GB or even bigger.

Currently, only x86_64 (1G+2M) and arm64 (2M) are supported.  The last
patch (from Alex Williamson) will be the first user of huge pfnmap, so as
to enable vfio-pci driver to fault in huge pfn mappings.

Implementation
==============

In reality, it's relatively simple to add such support comparing to many
other types of mappings, because of PFNMAP's specialties when there's no
vmemmap backing it, so that most of the kernel routines on huge mappings
should simply already fail for them, like GUPs or old-school follow_page()
(which is recently rewritten to be folio_walk* APIs by David).

One trick here is that we're still unmature on PUDs in generic paths here
and there, as DAX is so far the only user.  This patchset will add the 2nd
user of it.  Hugetlb can be a 3rd user if the hugetlb unification work can
go on smoothly, but to be discussed later.

The other trick is how to allow gup-fast working for such huge mappings
even if there's no direct sign of knowing whether it's a normal page or
MMIO mapping.  This series chose to keep the pte_special solution, so that
it reuses similar idea on setting a special bit to pfnmap PMDs/PUDs so
that gup-fast will be able to identify them and fail properly.

Along the way, we'll also notice that the major pgtable pfn walker, aka,
follow_pte(), will need to retire soon due to the fact that it only works
with ptes.  A new set of simple API is introduced (follow_pfnmap* API) to
be able to do whatever follow_pte() can already do, plus that it can also
process huge pfnmaps now.  Half of this series is about that and
converting all existing pfnmap walkers to use the new API properly.
Hopefully the new API also looks better to avoid exposing e.g.  pgtable
lock details into the callers, so that it can be used in an even more
straightforward way.

Here, three more options will be introduced and involved in huge pfnmap:

  - ARCH_SUPPORTS_HUGE_PFNMAP

    Arch developers will need to select this option when huge pfnmap is
    supported in arch's Kconfig.  After this patchset applied, both x86_64
    and arm64 will start to enable it by default.

  - ARCH_SUPPORTS_PMD_PFNMAP / ARCH_SUPPORTS_PUD_PFNMAP

    These options are for driver developers to identify whether current
    arch / config supports huge pfnmaps, making decision on whether it can
    use the huge pfnmap APIs to inject them.  One can refer to the last
    vfio-pci patch from Alex on the use of them properly in a device
    driver.

So after the whole set applied, and if one would enable some dynamic debug
lines in vfio-pci core files, we should observe things like:

  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x0: 0x100
  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x200: 0x100
  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x400: 0x100

In this specific case, it says that vfio-pci faults in PMDs properly for a
few BAR0 offsets.

Patch Layout
============

Patch 1:         Introduce the new options mentioned above for huge PFNMAPs
Patch 2:         A tiny cleanup
Patch 3-8:       Preparation patches for huge pfnmap (include introduce
                 special bit for pmd/pud)
Patch 9-16:      Introduce follow_pfnmap*() API, use it everywhere, and
                 then drop follow_pte() API
Patch 17:        Add huge pfnmap support for x86_64
Patch 18:        Add huge pfnmap support for arm64
Patch 19:        Add vfio-pci support for all kinds of huge pfnmaps (Alex)

TODO
====

More architectures / More page sizes
------------------------------------

Currently only x86_64 (2M+1G) and arm64 (2M) are supported.  There seems
to have plan to support arm64 1G later on top of this series [2].

Any arch will need to first support THP / THP_1G, then provide a special
bit in pmds/puds to support huge pfnmaps.

remap_pfn_range() support
-------------------------

Currently, remap_pfn_range() still only maps PTEs.  With the new option,
remap_pfn_range() can logically start to inject either PMDs or PUDs when
the alignment requirements match on the VAs.

When the support is there, it should be able to silently benefit all
drivers that is using remap_pfn_range() in its mmap() handler on better
TLB hit rate and overall faster MMIO accesses similar to processor on
hugepages.

More driver support
-------------------

VFIO is so far the only consumer for the huge pfnmaps after this series
applied.  Besides above remap_pfn_range() generic optimization, device
driver can also try to optimize its mmap() on a better VA alignment for
either PMD/PUD sizes.  This may, iiuc, normally require userspace changes,
as the driver doesn't normally decide the VA to map a bar.  But I don't
think I know all the drivers to know the full picture.

Credits all go to Alex on help testing the GPU/NIC use cases above.

[0] https://lore.kernel.org/r/73ad9540-3fb8-4154-9a4f-30a0a2b03d41@lucifer.local
[1] https://lore.kernel.org/r/20240807194812.819412-1-peterx@redhat.com
[2] https://lore.kernel.org/r/498e0731-81a4-4f75-95b4-a8ad0bcc7665@huawei.com


This patch (of 19):

This patch introduces the option to introduce special pte bit into
pmd/puds.  Archs can start to define pmd_special / pud_special when
supported by selecting the new option.  Per-arch support will be added
later.

Before that, create fallbacks for these helpers so that they are always
available.

Link: https://lkml.kernel.org/r/20240826204353.2228736-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20240826204353.2228736-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 39f6faace590..c2307a2d275d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2643,6 +2643,30 @@ static inline pte_t pte_mkspecial(pte_t pte)
 }
 #endif
 
+#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+	return false;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+	return pmd;
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+	return false;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+	return pud;
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
 #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
 static inline int pte_devmap(pte_t pte)
 {
-- 
cgit v1.2.3


From ef713ec3a566d3e5e011c5d6201eb661ebf94c1f Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:36 -0400
Subject: mm: drop is_huge_zero_pud()

It constantly returns false since 2017.  One assertion is added in 2019 but
it should never have triggered, IOW it means what is checked should be
asserted instead.

If it didn't exist for 7 years maybe it's good idea to remove it and only
add it when it comes.

Link: https://lkml.kernel.org/r/20240826204353.2228736-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e3896ebf0f94..ffca706bac81 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -433,11 +433,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 	return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
 }
 
-static inline bool is_huge_zero_pud(pud_t pud)
-{
-	return false;
-}
-
 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
 void mm_put_huge_zero_folio(struct mm_struct *mm);
 
@@ -578,11 +573,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 	return false;
 }
 
-static inline bool is_huge_zero_pud(pud_t pud)
-{
-	return false;
-}
-
 static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
 {
 	return;
-- 
cgit v1.2.3


From 5dd40721f147e83733ad34848330913cb633046e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:38 -0400
Subject: mm: allow THP orders for PFNMAPs

This enables PFNMAPs to be mapped at either pmd/pud layers.  Generalize the
dax case into vma_is_special_huge() so as to cover both.  Meanwhile, rename
the macro to THP_ORDERS_ALL_SPECIAL.

Link: https://lkml.kernel.org/r/20240826204353.2228736-5-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ffca706bac81..0b0539f4ee1a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -76,9 +76,9 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
 /*
  * Mask of all large folio orders supported for file THP. Folios in a DAX
  * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
- * it.
+ * it.  Same to PFNMAPs where there's neither page* nor pagecache.
  */
-#define THP_ORDERS_ALL_FILE_DAX		\
+#define THP_ORDERS_ALL_SPECIAL		\
 	(BIT(PMD_ORDER) | BIT(PUD_ORDER))
 #define THP_ORDERS_ALL_FILE_DEFAULT	\
 	((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
@@ -87,7 +87,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
  * Mask of all large folio orders supported for THP.
  */
 #define THP_ORDERS_ALL	\
-	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT)
+	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT)
 
 #define TVA_SMAPS		(1 << 0)	/* Will be used for procfs */
 #define TVA_IN_PF		(1 << 1)	/* Page fault handler */
-- 
cgit v1.2.3


From 0515e022e167cfacf1fee092eb93aa9514e23c0a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:42 -0400
Subject: mm: always define pxx_pgprot()

There're:

  - 8 archs (arc, arm64, include, mips, powerpc, s390, sh, x86) that
  support pte_pgprot().

  - 2 archs (x86, sparc) that support pmd_pgprot().

  - 1 arch (x86) that support pud_pgprot().

Always define them to be used in generic code, and then we don't need to
fiddle with "#ifdef"s when doing so.

Link: https://lkml.kernel.org/r/20240826204353.2228736-9-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 780f3b439d98..e8b2ac6bd2ae 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1956,6 +1956,18 @@ typedef unsigned int pgtbl_mod_mask;
 #define MAX_PTRS_PER_P4D PTRS_PER_P4D
 #endif
 
+#ifndef pte_pgprot
+#define pte_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pmd_pgprot
+#define pmd_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pud_pgprot
+#define pud_pgprot(x) ((pgprot_t) {0})
+#endif
+
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
  * behavior is in parens:
-- 
cgit v1.2.3


From 6da8e9634bb7e3fdad9ae0e4db873a05036c4343 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:43 -0400
Subject: mm: new follow_pfnmap API

Introduce a pair of APIs to follow pfn mappings to get entry information.
It's very similar to what follow_pte() does before, but different in that
it recognizes huge pfn mappings.

Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c2307a2d275d..62bd89414897 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 			void *buf, int len, int write);
 
+struct follow_pfnmap_args {
+	/**
+	 * Inputs:
+	 * @vma: Pointer to @vm_area_struct struct
+	 * @address: the virtual address to walk
+	 */
+	struct vm_area_struct *vma;
+	unsigned long address;
+	/**
+	 * Internals:
+	 *
+	 * The caller shouldn't touch any of these.
+	 */
+	spinlock_t *lock;
+	pte_t *ptep;
+	/**
+	 * Outputs:
+	 *
+	 * @pfn: the PFN of the address
+	 * @pgprot: the pgprot_t of the mapping
+	 * @writable: whether the mapping is writable
+	 * @special: whether the mapping is a special mapping (real PFN maps)
+	 */
+	unsigned long pfn;
+	pgprot_t pgprot;
+	bool writable;
+	bool special;
+};
+int follow_pfnmap_start(struct follow_pfnmap_args *args);
+void follow_pfnmap_end(struct follow_pfnmap_args *args);
+
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
-- 
cgit v1.2.3


From b0a1c0d0edcd75a0f8ec5fd19dbd64b8d097f534 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:50 -0400
Subject: mm: remove follow_pte()

follow_pte() users have been converted to follow_pfnmap*().  Remove the
API.

Link: https://lkml.kernel.org/r/20240826204353.2228736-17-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 62bd89414897..d750be768121 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2368,8 +2368,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int
 copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-int follow_pte(struct vm_area_struct *vma, unsigned long address,
-	       pte_t **ptepp, spinlock_t **ptlp);
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 			void *buf, int len, int write);
 
-- 
cgit v1.2.3


From 82ce8e2f31a1eb05b1527c3d807bea40031df913 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 7 Sep 2024 17:40:42 +0200
Subject: set_memory: add __must_check to generic stubs

Following query shows that architectures that don't provide
asm/set_memory.h don't use set_memory_...() functions.

  $ git grep set_memory_ alpha arc csky hexagon loongarch m68k microblaze mips nios2 openrisc parisc sh sparc um xtensa

Following query shows that all core users of set_memory_...()
functions always take returned value into account:

  $ git grep -w -e set_memory_ro -e set_memory_rw -e set_memory_x -e set_memory_nx -e set_memory_rox `find . -maxdepth 1 -type d | grep -v arch | grep /`

set_memory_...() functions can fail, leaving the memory attributes
unchanged. Make sure all callers check the returned code.

Link: https://github.com/KSPP/linux/issues/7
Link: https://lkml.kernel.org/r/6a89ffc69666de84721216947c6b6c7dcca39d7d.1725723347.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <kees@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/set_memory.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 95ac8398ee72..e7aec20fb44f 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -8,10 +8,10 @@
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 #include <asm/set_memory.h>
 #else
-static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
-static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
-static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
-static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int __must_check set_memory_x(unsigned long addr,  int numpages) { return 0; }
+static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
 #ifndef set_memory_rox
-- 
cgit v1.2.3


From 325efb16da2c840e165d9b620fec8049d4d664cc Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Mon, 9 Sep 2024 11:21:18 +1200
Subject: mm: add nr argument in mem_cgroup_swapin_uncharge_swap() helper to
 support large folios

With large folios swap-in, we might need to uncharge multiple entries all
together, add nr argument in mem_cgroup_swapin_uncharge_swap().

For the existing two users, just pass nr=1.

Link: https://lkml.kernel.org/r/20240908232119.2157-3-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2ef94c74847d..34d2da05f2f1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -699,7 +699,8 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
 
 int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
-void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
+
+void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
 
 void __mem_cgroup_uncharge(struct folio *folio);
 
@@ -1206,7 +1207,7 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
 	return 0;
 }
 
-static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
+static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr)
 {
 }
 
-- 
cgit v1.2.3


From ed8d5b0ce1d738e13c60d6b1a901a56d832e5070 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 11 Sep 2024 15:13:20 +0200
Subject: Revert "uprobes: use vm_special_mapping close() functionality"

This reverts commit 08e28de1160a712724268fd33d77b32f1bc84d1c.

A malicious application can munmap() its "[uprobes]" vma and in this case
xol_mapping.close == uprobe_clear_state() will free the memory which can
be used by another thread, or the same thread when it hits the uprobe bp
afterwards.

Link: https://lkml.kernel.org/r/20240911131320.GA3448@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 493dc95d912c..b503fafb7fb3 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -126,6 +126,7 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void uprobe_clear_state(struct mm_struct *mm);
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-- 
cgit v1.2.3


From aef79e189ba2b32f78bd35daf2c0b41f3868a321 Mon Sep 17 00:00:00 2001
From: Carlos Song <carlos.song@nxp.com>
Date: Tue, 10 Sep 2024 13:16:25 +0800
Subject: i3c: master: support to adjust first broadcast address speed

According to I3C spec 6.2 Timing Specification, the Open Drain High Period
of SCL Clock timing for first broadcast address should be adjusted to 200ns
at least. I3C device working as i2c device will see the broadcast to close
its Spike Filter then change to work at I3C mode. After that I3C open drain
SCL high level should be adjusted back.

Signed-off-by: Carlos Song <carlos.song@nxp.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20240910051626.4052552-1-carlos.song@nxp.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 074f632868d9..2a1ed05d5782 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -277,6 +277,20 @@ enum i3c_bus_mode {
 	I3C_BUS_MODE_MIXED_SLOW,
 };
 
+/**
+ * enum i3c_open_drain_speed - I3C open-drain speed
+ * @I3C_OPEN_DRAIN_SLOW_SPEED: Slow open-drain speed for sending the first
+ *				broadcast address. The first broadcast address at this speed
+ *				will be visible to all devices on the I3C bus. I3C devices
+ *				working in I2C mode will turn off their spike filter when
+ *				switching into I3C mode.
+ * @I3C_OPEN_DRAIN_NORMAL_SPEED: Normal open-drain speed in I3C bus mode.
+ */
+enum i3c_open_drain_speed {
+	I3C_OPEN_DRAIN_SLOW_SPEED,
+	I3C_OPEN_DRAIN_NORMAL_SPEED,
+};
+
 /**
  * enum i3c_addr_slot_status - I3C address slot status
  * @I3C_ADDR_SLOT_FREE: address is free
@@ -436,6 +450,7 @@ struct i3c_bus {
  *		      NULL.
  * @enable_hotjoin: enable hot join event detect.
  * @disable_hotjoin: disable hot join event detect.
+ * @set_speed: adjust I3C open drain mode timing.
  */
 struct i3c_master_controller_ops {
 	int (*bus_init)(struct i3c_master_controller *master);
@@ -464,6 +479,7 @@ struct i3c_master_controller_ops {
 				 struct i3c_ibi_slot *slot);
 	int (*enable_hotjoin)(struct i3c_master_controller *master);
 	int (*disable_hotjoin)(struct i3c_master_controller *master);
+	int (*set_speed)(struct i3c_master_controller *master, enum i3c_open_drain_speed speed);
 };
 
 /**
-- 
cgit v1.2.3


From f761fcdd289d07e8547fef7ac76c3760fc7803f2 Mon Sep 17 00:00:00 2001
From: Dongliang Cui <dongliang.cui@unisoc.com>
Date: Wed, 18 Sep 2024 07:40:05 +0900
Subject: exfat: Implement sops->shutdown and ioctl

We found that when writing a large file through buffer write, if the
disk is inaccessible, exFAT does not return an error normally, which
leads to the writing process not stopping properly.

To easily reproduce this issue, you can follow the steps below:

1. format a device to exFAT and then mount (with a full disk erase)
2. dd if=/dev/zero of=/exfat_mount/test.img bs=1M count=8192
3. eject the device

You may find that the dd process does not stop immediately and may
continue for a long time.

The root cause of this issue is that during buffer write process,
exFAT does not need to access the disk to look up directory entries
or the FAT table (whereas FAT would do) every time data is written.
Instead, exFAT simply marks the buffer as dirty and returns,
delegating the writeback operation to the writeback process.

If the disk cannot be accessed at this time, the error will only be
returned to the writeback process, and the original process will not
receive the error, so it cannot be returned to the user side.

When the disk cannot be accessed normally, an error should be returned
to stop the writing process.

Implement sops->shutdown and ioctl to shut down the file system
when underlying block device is marked dead.

Signed-off-by: Dongliang Cui <dongliang.cui@unisoc.com>
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 include/uapi/linux/exfat.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/uapi/linux/exfat.h

(limited to 'include')

diff --git a/include/uapi/linux/exfat.h b/include/uapi/linux/exfat.h
new file mode 100644
index 000000000000..46d95b16fc4b
--- /dev/null
+++ b/include/uapi/linux/exfat.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2024 Unisoc Technologies Co., Ltd.
+ */
+
+#ifndef _UAPI_LINUX_EXFAT_H
+#define _UAPI_LINUX_EXFAT_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * exfat-specific ioctl commands
+ */
+
+#define EXFAT_IOC_SHUTDOWN _IOR('X', 125, __u32)
+
+/*
+ * Flags used by EXFAT_IOC_SHUTDOWN
+ */
+
+#define EXFAT_GOING_DOWN_DEFAULT	0x0	/* default with full sync */
+#define EXFAT_GOING_DOWN_FULLSYNC	0x1     /* going down with full sync*/
+#define EXFAT_GOING_DOWN_NOSYNC         0x2     /* going down */
+
+#endif /* _UAPI_LINUX_EXFAT_H */
-- 
cgit v1.2.3


From 9ba5dcc722de4390a1d3211b2ee3c864f84f5461 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 20 Sep 2024 01:48:17 +0100
Subject: block: Remove unused blk_limits_io_{min,opt}

blk_limits_io_min and blk_limits_io_opt are unused since the
recent commit
  0a94a469a4f0 ("dm: stop using blk_limits_io_{min,opt}")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20240920004817.676216-1-linux@treblig.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 643c9020a35a..50c3b959da28 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -968,8 +968,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
 /*
  * Access functions for manipulating queue properties
  */
-extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
-extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
cgit v1.2.3


From 65f666c6203600053478ce8e34a1db269a8701c9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 19 Sep 2024 10:17:09 +0800
Subject: lib/sbitmap: define swap_lock as raw_spinlock_t

When called from sbitmap_queue_get(), sbitmap_deferred_clear() may be run
with preempt disabled. In RT kernel, spin_lock() can sleep, then warning
of "BUG: sleeping function called from invalid context" can be triggered.

Fix it by replacing it with raw_spin_lock.

Cc: Yang Yang <yang.yang@vivo.com>
Fixes: 72d04bdcf3f7 ("sbitmap: fix io hung due to race on sbitmap_word::cleared")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yang Yang <yang.yang@vivo.com>
Link: https://lore.kernel.org/r/20240919021709.511329-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index c09cdcc99471..189140bf11fc 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -40,7 +40,7 @@ struct sbitmap_word {
 	/**
 	 * @swap_lock: serializes simultaneous updates of ->word and ->cleared
 	 */
-	spinlock_t swap_lock;
+	raw_spinlock_t swap_lock;
 } ____cacheline_aligned_in_smp;
 
 /**
-- 
cgit v1.2.3


From 652bfcb76fe689f4d85b6f3688025a87fb94f9a1 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 31 Jul 2024 15:43:13 +0800
Subject: KEYS: Remove unused declarations

These declarations are never implemented, remove it.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/keys/dns_resolver-type.h | 4 ----
 include/linux/key.h              | 3 ---
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/keys/dns_resolver-type.h b/include/keys/dns_resolver-type.h
index 218ca22fb056..1b89088a2837 100644
--- a/include/keys/dns_resolver-type.h
+++ b/include/keys/dns_resolver-type.h
@@ -12,8 +12,4 @@
 
 extern struct key_type key_type_dns_resolver;
 
-extern int request_dns_resolver_key(const char *description,
-				    const char *callout_info,
-				    char **data);
-
 #endif /* _KEYS_DNS_RESOLVER_TYPE_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index 943a432da3ae..074dca3222b9 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -436,9 +436,6 @@ extern key_ref_t keyring_search(key_ref_t keyring,
 				const char *description,
 				bool recurse);
 
-extern int keyring_add_key(struct key *keyring,
-			   struct key *key);
-
 extern int keyring_restrict(key_ref_t keyring, const char *type,
 			    const char *restriction);
 
-- 
cgit v1.2.3


From 60749cbe3d8ae572a6c7dda675de3e8b25797a18 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Jul 2024 17:14:18 +1000
Subject: sunrpc: change sp_nrthreads from atomic_t to unsigned int.

sp_nrthreads is only ever accessed under the service mutex
  nlmsvc_mutex nfs_callback_mutex nfsd_mutex
so these is no need for it to be an atomic_t.

The fact that all code using it is single-threaded means that we can
simplify svc_pool_victim and remove the temporary elevation of
sp_nrthreads.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e4fa25fafa97..99e9345d829e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -33,9 +33,9 @@
  * node traffic on multi-node NUMA NFS servers.
  */
 struct svc_pool {
-	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
+	unsigned int		sp_id;		/* pool id; also node id on NUMA */
 	struct lwq		sp_xprts;	/* pending transports */
-	atomic_t		sp_nrthreads;	/* # of threads in pool */
+	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
 	struct llist_head	sp_idle_threads; /* idle server threads */
 
-- 
cgit v1.2.3


From 3391fc92db8e761f1a2df5612fcb999dac6bc00a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 16 Sep 2024 09:45:40 +1000
Subject: sunrpc: allow svc threads to fail initialisation cleanly

If an svc thread needs to perform some initialisation that might fail,
it has no good way to handle the failure.

Before the thread can exit it must call svc_exit_thread(), but that
requires the service mutex to be held.  The thread cannot simply take
the mutex as that could deadlock if there is a concurrent attempt to
shut down all threads (which is unlikely, but not impossible).

nfsd currently call svc_exit_thread() unprotected in the unlikely event
that unshare_fs_struct() fails.

We can clean this up by introducing svc_thread_init_status() by which an
svc thread can report whether initialisation has succeeded.  If it has,
it continues normally into the action loop.  If it has not,
svc_thread_init_status() immediately aborts the thread.
svc_start_kthread() waits for either of these to happen, and calls
svc_exit_thread() (under the mutex) if the thread aborted.

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 99e9345d829e..c419a61f60e5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -21,6 +21,7 @@
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/pagevec.h>
+#include <linux/kthread.h>
 
 /*
  *
@@ -232,6 +233,11 @@ struct svc_rqst {
 	struct net		*rq_bc_net;	/* pointer to backchannel's
 						 * net namespace
 						 */
+
+	int			rq_err;		/* Thread sets this to inidicate
+						 * initialisation success.
+						 */
+
 	unsigned long	bc_to_initval;
 	unsigned int	bc_to_retries;
 	void **			rq_lease_breaker; /* The v4 client breaking a lease */
@@ -305,6 +311,31 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
 	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
 }
 
+/**
+ * svc_thread_init_status - report whether thread has initialised successfully
+ * @rqstp: the thread in question
+ * @err: errno code
+ *
+ * After performing any initialisation that could fail, and before starting
+ * normal work, each sunrpc svc_thread must call svc_thread_init_status()
+ * with an appropriate error, or zero.
+ *
+ * If zero is passed, the thread is ready and must continue until
+ * svc_thread_should_stop() returns true.  If a non-zero error is passed
+ * the call will not return - the thread will exit.
+ */
+static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
+{
+	rqstp->rq_err = err;
+	/* memory barrier ensures assignment to error above is visible before
+	 * waitqueue_active() test below completes.
+	 */
+	smp_mb();
+	wake_up_var(&rqstp->rq_err);
+	if (err)
+		kthread_exit(1);
+}
+
 struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct svc_xprt		*xprt;
-- 
cgit v1.2.3


From 36ffa3d0de54c1cf516ea32a5ec556f5c9874795 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Jul 2024 11:47:23 +1000
Subject: nfsd: be more systematic about selecting error codes for internal
 use.

Rather than using ad hoc values for internal errors (30000, 11000, ...)
use 'enum' to sequentially allocate numbers starting from the first
known available number - now visible as NFS4ERR_FIRST_FREE.

The goal is values that are distinct from all be32 error codes.  To get
those we must first select integers that are not already used, then
convert them with cpu_to_be32().

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/nfs4.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index f9df88091c6d..8d7430d9f218 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -281,15 +281,18 @@ enum nfsstat4 {
 	/* nfs42 */
 	NFS4ERR_PARTNER_NOTSUPP	= 10088,
 	NFS4ERR_PARTNER_NO_AUTH	= 10089,
-	NFS4ERR_UNION_NOTSUPP = 10090,
-	NFS4ERR_OFFLOAD_DENIED = 10091,
-	NFS4ERR_WRONG_LFS = 10092,
-	NFS4ERR_BADLABEL = 10093,
-	NFS4ERR_OFFLOAD_NO_REQS = 10094,
+	NFS4ERR_UNION_NOTSUPP	= 10090,
+	NFS4ERR_OFFLOAD_DENIED	= 10091,
+	NFS4ERR_WRONG_LFS	= 10092,
+	NFS4ERR_BADLABEL	= 10093,
+	NFS4ERR_OFFLOAD_NO_REQS	= 10094,
 
 	/* xattr (RFC8276) */
-	NFS4ERR_NOXATTR        = 10095,
-	NFS4ERR_XATTR2BIG      = 10096,
+	NFS4ERR_NOXATTR		= 10095,
+	NFS4ERR_XATTR2BIG	= 10096,
+
+	/* can be used for internal errors */
+	NFS4ERR_FIRST_FREE
 };
 
 /* error codes for internal client use */
-- 
cgit v1.2.3


From c4de97f7c45434985e5dbf2d6ccc9eca676e37fe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 29 Jul 2024 16:52:32 -0400
Subject: svcrdma: Handle device removal outside of the CM event handler

Synchronously wait for all disconnects to complete to ensure the
transports have divested all hardware resources before the
underlying RDMA device can safely be removed.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h |  2 ++
 include/trace/events/rpcrdma.h  | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d33bab33099a..619fc0bd837a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -48,6 +48,7 @@
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/rpc_rdma_cid.h>
 #include <linux/sunrpc/svc_rdma_pcl.h>
+#include <linux/sunrpc/rdma_rn.h>
 
 #include <linux/percpu_counter.h>
 #include <rdma/ib_verbs.h>
@@ -76,6 +77,7 @@ struct svcxprt_rdma {
 	struct svc_xprt      sc_xprt;		/* SVC transport structure */
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
+	struct rpcrdma_notification sc_rn;	/* removal notification */
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_send_sges;
 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index a96a985c49b3..e6a72646c507 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2172,6 +2172,29 @@ TRACE_EVENT(svcrdma_qp_error,
 	)
 );
 
+TRACE_EVENT(svcrdma_device_removal,
+	TP_PROTO(
+		const struct rdma_cm_id *id
+	),
+
+	TP_ARGS(id),
+
+	TP_STRUCT__entry(
+		__string(name, id->device->name)
+		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__assign_str(name);
+		memcpy(__entry->addr, &id->route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("device %s to be removed, disconnecting %pISpc\n",
+		__get_str(name), __entry->addr
+	)
+);
+
 DECLARE_EVENT_CLASS(svcrdma_sendqueue_class,
 	TP_PROTO(
 		const struct svcxprt_rdma *rdma,
-- 
cgit v1.2.3


From 4b132aacb0768ac1e652cf517097ea6f237214b9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 13 Sep 2024 14:08:13 -0400
Subject: tools: Add xdrgen

Add a Python-based tool for translating XDR specifications into XDR
encoder and decoder functions written in the Linux kernel's C coding
style. The generator attempts to match the usual C coding style of
the Linux kernel's SunRPC consumers.

This approach is similar to the netlink code generator in
tools/net/ynl .

The maintainability benefits of machine-generated XDR code include:

- Stronger type checking
- Reduces the number of bugs introduced by human error
- Makes the XDR code easier to audit and analyze
- Enables rapid prototyping of new RPC-based protocols
- Hardens the layering between protocol logic and marshaling
- Makes it easier to add observability on demand
- Unit tests might be built for both the tool and (automatically)
  for the generated code

In addition, converting the XDR layer to use memory-safe languages
such as Rust will be easier if much of the code can be converted
automatically.

Tested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdrgen/_builtins.h | 243 ++++++++++++++++++++++++++++++++
 include/linux/sunrpc/xdrgen/_defs.h     |  26 ++++
 2 files changed, 269 insertions(+)
 create mode 100644 include/linux/sunrpc/xdrgen/_builtins.h
 create mode 100644 include/linux/sunrpc/xdrgen/_defs.h

(limited to 'include')

diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h
new file mode 100644
index 000000000000..68746c59fc9a
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/_builtins.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ *
+ * This header defines XDR data type primitives specified in
+ * Section 4 of RFC 4506, used by RPC programs implemented
+ * in the Linux kernel.
+ */
+
+#ifndef _SUNRPC_XDRGEN__BUILTINS_H_
+#define _SUNRPC_XDRGEN__BUILTINS_H_
+
+#include <linux/sunrpc/xdr.h>
+
+static inline bool
+xdrgen_decode_void(struct xdr_stream *xdr)
+{
+	return true;
+}
+
+static inline bool
+xdrgen_encode_void(struct xdr_stream *xdr)
+{
+	return true;
+}
+
+static inline bool
+xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = (*p != xdr_zero);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_bool(struct xdr_stream *xdr, bool val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*p = val ? xdr_one : xdr_zero;
+	return true;
+}
+
+static inline bool
+xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = be32_to_cpup(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_int(struct xdr_stream *xdr, s32 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*p = cpu_to_be32(val);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = be32_to_cpup(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*p = cpu_to_be32(val);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = be32_to_cpup(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_long(struct xdr_stream *xdr, s32 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*p = cpu_to_be32(val);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = be32_to_cpup(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+	if (unlikely(!p))
+		return false;
+	*p = cpu_to_be32(val);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = get_unaligned_be64(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2);
+
+	if (unlikely(!p))
+		return false;
+	put_unaligned_be64(val, p);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr)
+{
+	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+
+	if (unlikely(!p))
+		return false;
+	*ptr = get_unaligned_be64(p);
+	return true;
+}
+
+static inline bool
+xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2);
+
+	if (unlikely(!p))
+		return false;
+	put_unaligned_be64(val, p);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen)
+{
+	__be32 *p;
+	u32 len;
+
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
+		return false;
+	if (unlikely(maxlen && len > maxlen))
+		return false;
+	if (len != 0) {
+		p = xdr_inline_decode(xdr, len);
+		if (unlikely(!p))
+			return false;
+		ptr->data = (unsigned char *)p;
+	}
+	ptr->len = len;
+	return true;
+}
+
+static inline bool
+xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len));
+
+	if (unlikely(!p))
+		return false;
+	xdr_encode_opaque(p, val.data, val.len);
+	return true;
+}
+
+static inline bool
+xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen)
+{
+	__be32 *p;
+	u32 len;
+
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
+		return false;
+	if (unlikely(maxlen && len > maxlen))
+		return false;
+	if (len != 0) {
+		p = xdr_inline_decode(xdr, len);
+		if (unlikely(!p))
+			return false;
+		ptr->data = (u8 *)p;
+	}
+	ptr->len = len;
+	return true;
+}
+
+static inline bool
+xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val)
+{
+	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len));
+
+	if (unlikely(!p))
+		return false;
+	xdr_encode_opaque(p, val.data, val.len);
+	return true;
+}
+
+#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */
diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h
new file mode 100644
index 000000000000..be9e62371758
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/_defs.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ *
+ * This header defines XDR data type primitives specified in
+ * Section 4 of RFC 4506, used by RPC programs implemented
+ * in the Linux kernel.
+ */
+
+#ifndef _SUNRPC_XDRGEN__DEFS_H_
+#define _SUNRPC_XDRGEN__DEFS_H_
+
+#define TRUE	(true)
+#define FALSE	(false)
+
+typedef struct {
+	u32 len;
+	unsigned char *data;
+} string;
+
+typedef struct {
+	u32 len;
+	u8 *data;
+} opaque;
+
+#endif /* _SUNRPC_XDRGEN__DEFS_H_ */
-- 
cgit v1.2.3


From 663ad8b1df8724cd5e01df66ea67ce0424fbcdf6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 10 Sep 2024 15:31:19 -0400
Subject: xdrgen: Fix return code checking in built-in XDR decoders

xdr_stream_encode_u32() returns XDR_UNIT on success.
xdr_stream_decode_u32() returns zero or -EMSGSIZE, but never
XDR_UNIT.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdrgen/_builtins.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h
index 68746c59fc9a..66ca3ece951a 100644
--- a/include/linux/sunrpc/xdrgen/_builtins.h
+++ b/include/linux/sunrpc/xdrgen/_builtins.h
@@ -184,7 +184,7 @@ xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen)
 	__be32 *p;
 	u32 len;
 
-	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
 		return false;
 	if (unlikely(maxlen && len > maxlen))
 		return false;
@@ -215,7 +215,7 @@ xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen)
 	__be32 *p;
 	u32 len;
 
-	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
 		return false;
 	if (unlikely(maxlen && len > maxlen))
 		return false;
-- 
cgit v1.2.3


From bb0e391975f8da826305cbaa3e3d34b03c47e2a6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 22 Sep 2024 09:10:17 +0200
Subject: dma-mapping: fix vmap and mmap of noncontiougs allocations

Commit b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") switched
to use direct calls to dma-iommu, but missed the dma_vmap_noncontiguous,
dma_vunmap_noncontiguous and dma_mmap_noncontiguous behavior keyed off the
presence of the alloc_noncontiguous method.

Fix this by removing the now unused alloc_noncontiguous and
free_noncontiguous methods and moving the vmapping and mmaping of the
noncontiguous allocations into the iommu code, as it is the only provider
of actually noncontiguous allocations.

Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu")
Reported-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Leon Romanovsky <leon@kernel.org>
Tested-by: Xi Ruoyao <xry111@xry111.site>
---
 include/linux/dma-map-ops.h | 19 -------------------
 include/linux/iommu-dma.h   |  6 ++++++
 2 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 9668ddf3696e..b7773201414c 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -24,11 +24,6 @@ struct dma_map_ops {
 			gfp_t gfp);
 	void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
 			dma_addr_t dma_handle, enum dma_data_direction dir);
-	struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size,
-			enum dma_data_direction dir, gfp_t gfp,
-			unsigned long attrs);
-	void (*free_noncontiguous)(struct device *dev, size_t size,
-			struct sg_table *sgt, enum dma_data_direction dir);
 	int (*mmap)(struct device *, struct vm_area_struct *,
 			void *, dma_addr_t, size_t, unsigned long attrs);
 
@@ -206,20 +201,6 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
 }
 #endif /* CONFIG_DMA_GLOBAL_POOL */
 
-/*
- * This is the actual return value from the ->alloc_noncontiguous method.
- * The users of the DMA API should only care about the sg_table, but to make
- * the DMA-API internal vmaping and freeing easier we stash away the page
- * array as well (except for the fallback case).  This can go away any time,
- * e.g. when a vmap-variant that takes a scatterlist comes along.
- */
-struct dma_sgt_handle {
-	struct sg_table sgt;
-	struct page **pages;
-};
-#define sgt_handle(sgt) \
-	container_of((sgt), struct dma_sgt_handle, sgt)
-
 int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		unsigned long attrs);
diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index 1bb55ca1ab79..7bf145a52d6a 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -44,6 +44,12 @@ struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
 		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
 void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
 		struct sg_table *sgt, enum dma_data_direction dir);
+void *iommu_dma_vmap_noncontiguous(struct device *dev, size_t size,
+		struct sg_table *sgt);
+#define iommu_dma_vunmap_noncontiguous(dev, vaddr) \
+	vunmap(vaddr);
+int iommu_dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+		size_t size, struct sg_table *sgt);
 void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction dir);
 void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-- 
cgit v1.2.3


From 3d09ff45469eb2912ce2227c47efac297230d6d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 22 Sep 2024 09:21:10 +0200
Subject: iommu/dma: remove most stubs in iommu-dma.h

The direct calls from mapping.c all guarded by use_dma_iommu(), so don't
bother to provide stubs, but instead just expose the prototypes
unconditionally.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/iommu-dma.h | 108 ++++------------------------------------------
 1 file changed, 8 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index 7bf145a52d6a..508beaa44c39 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -14,6 +14,13 @@ static inline bool use_dma_iommu(struct device *dev)
 {
 	return dev->dma_iommu;
 }
+#else
+static inline bool use_dma_iommu(struct device *dev)
+{
+	return false;
+}
+#endif /* CONFIG_IOMMU_DMA */
+
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs);
@@ -58,104 +65,5 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 		int nelems, enum dma_data_direction dir);
 void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 		int nelems, enum dma_data_direction dir);
-#else
-static inline bool use_dma_iommu(struct device *dev)
-{
-	return false;
-}
-static inline dma_addr_t iommu_dma_map_page(struct device *dev,
-		struct page *page, unsigned long offset, size_t size,
-		enum dma_data_direction dir, unsigned long attrs)
-{
-	return DMA_MAPPING_ERROR;
-}
-static inline void iommu_dma_unmap_page(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-}
-static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	return -EINVAL;
-}
-static inline void iommu_dma_unmap_sg(struct device *dev,
-		struct scatterlist *sg, int nents, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-}
-static inline void *iommu_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
-{
-	return NULL;
-}
-static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		unsigned long attrs)
-{
-	return -EINVAL;
-}
-static inline int iommu_dma_get_sgtable(struct device *dev,
-		struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr,
-		size_t size, unsigned long attrs)
-{
-	return -EINVAL;
-}
-static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev)
-{
-	return 0;
-}
-static inline size_t iommu_dma_opt_mapping_size(void)
-{
-	return 0;
-}
-static inline size_t iommu_dma_max_mapping_size(struct device *dev)
-{
-	return 0;
-}
-static inline void iommu_dma_free(struct device *dev, size_t size,
-		void *cpu_addr, dma_addr_t handle, unsigned long attrs)
-{
-}
-static inline dma_addr_t iommu_dma_map_resource(struct device *dev,
-		phys_addr_t phys, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	return DMA_MAPPING_ERROR;
-}
-static inline void iommu_dma_unmap_resource(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-}
-static inline struct sg_table *
-iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
-		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs)
-{
-	return NULL;
-}
-static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
-		struct sg_table *sgt, enum dma_data_direction dir)
-{
-}
-static inline void iommu_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction dir)
-{
-}
-static inline void iommu_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
-{
-}
-static inline void iommu_dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sgl, int nelems,
-		enum dma_data_direction dir)
-{
-}
-static inline void iommu_dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sgl, int nelems,
-		enum dma_data_direction dir)
-{
-}
-#endif /* CONFIG_IOMMU_DMA */
+
 #endif /* _LINUX_IOMMU_DMA_H */
-- 
cgit v1.2.3


From d0dd066a0fa26d55c19ace9e89dedd9504c5bcba Mon Sep 17 00:00:00 2001
From: "Christoph Lameter (Ampere)" <cl@gentwo.org>
Date: Wed, 12 Jun 2024 09:49:56 -0700
Subject: seqcount: replace smp_rmb() in read_seqcount() with load acquire

Many architectures support load acquire which can replace a memory
barrier and save some cycles.

A typical sequence

	do {
		seq = read_seqcount_begin(&s);
		<something>
	} while (read_seqcount_retry(&s, seq);

requires 13 cycles on an N1 Neoverse arm64 core (Ampere Altra, to be
specific) for an empty loop.  Two read memory barriers are needed.  One
for each of the seqcount_* functions.

We can replace the first read barrier with a load acquire of the
seqcount which saves us one barrier.

On the Altra doing so reduces the cycle count from 13 to 8.

According to ARM, this is a general improvement for the ARM64
architecture and not specific to a certain processor.

See

  https://developer.arm.com/documentation/102336/0100/Load-Acquire-and-Store-Release-instructions

 "Weaker ordering requirements that are imposed by Load-Acquire and
  Store-Release instructions allow for micro-architectural
  optimizations, which could reduce some of the performance impacts that
  are otherwise imposed by an explicit memory barrier.

  If the ordering requirement is satisfied using either a Load-Acquire
  or Store-Release, then it would be preferable to use these
  instructions instead of a DMB"

[ NOTE! This is my original minimal patch that unconditionally switches
  over to using smp_load_acquire(), instead of the much more involved
  and subtle patch that Christoph Lameter wrote that made it
  conditional.

  But Christoph gets authorship credit because I had initially thought
  that we needed the more complex model, and Christoph ran with it it
  and did the work. Only after looking at code generation for all the
  relevant architectures, did I come to the conclusion that nobody
  actually really needs the old "smp_rmb()" model.

  Even architectures without load-acquire support generally do as well
  or better with smp_load_acquire().

  So credit to Christoph, but if this then causes issues on other
  architectures, put the blame solidly on me.

  Also note as part of the ruthless simplification, this gets rid of the
  overly subtle optimization where some code uses a non-barrier version
  of the sequence count (see the __read_seqcount_begin() users in
  fs/namei.c). They then play games with their own barriers and/or with
  nested sequence counts.

  Those optimizations are literally meaningless on x86, and questionable
  elsewhere. If somebody can show that they matter, we need to re-do
  them more cleanly than "use an internal helper".       - Linus ]

Signed-off-by: Christoph Lameter (Ampere) <cl@gentwo.org>
Link: https://lore.kernel.org/all/20240912-seq_optimize-v3-1-8ee25e04dffa@gentwo.org/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seqlock.h | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d90d8ee29d81..fffeb754880f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -157,7 +157,7 @@ __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s)	\
 static __always_inline unsigned						\
 __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
 {									\
-	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
+	unsigned seq = smp_load_acquire(&s->seqcount.sequence);		\
 									\
 	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
 		return seq;						\
@@ -170,7 +170,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
 		 * Re-read the sequence counter since the (possibly	\
 		 * preempted) writer made progress.			\
 		 */							\
-		seq = READ_ONCE(s->seqcount.sequence);			\
+		seq = smp_load_acquire(&s->seqcount.sequence);		\
 	}								\
 									\
 	return seq;							\
@@ -208,7 +208,7 @@ static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s)
 
 static inline unsigned __seqprop_sequence(const seqcount_t *s)
 {
-	return READ_ONCE(s->sequence);
+	return smp_load_acquire(&s->sequence);
 }
 
 static inline bool __seqprop_preemptible(const seqcount_t *s)
@@ -263,17 +263,9 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
 #define seqprop_assert(s)		__seqprop(s, assert)(s)
 
 /**
- * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
+ * __read_seqcount_begin() - begin a seqcount_t read section
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
- * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
- * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
- * provided before actually loading any of the variables that are to be
- * protected in this critical section.
- *
- * Use carefully, only in critical code, and comment how the barrier is
- * provided.
- *
  * Return: count to be passed to read_seqcount_retry()
  */
 #define __read_seqcount_begin(s)					\
@@ -293,13 +285,7 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-#define raw_read_seqcount_begin(s)					\
-({									\
-	unsigned _seq = __read_seqcount_begin(s);			\
-									\
-	smp_rmb();							\
-	_seq;								\
-})
+#define raw_read_seqcount_begin(s) __read_seqcount_begin(s)
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
@@ -328,7 +314,6 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
 ({									\
 	unsigned __seq = seqprop_sequence(s);				\
 									\
-	smp_rmb();							\
 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
 	__seq;								\
 })
-- 
cgit v1.2.3


From ffcdc4c628e1a30489da10dd78358e89c823b341 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Fri, 6 Sep 2024 16:34:52 +0200
Subject: fs/mnt_idmapping: introduce an invalid_mnt_idmap

Link: https://lore.kernel.org/linux-fsdevel/20240904-baugrube-erhoben-b3c1c49a2645@brauner/
Suggested-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/mnt_idmapping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index cd4d5c8781f5..b1b219bc3422 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -9,6 +9,7 @@ struct mnt_idmap;
 struct user_namespace;
 
 extern struct mnt_idmap nop_mnt_idmap;
+extern struct mnt_idmap invalid_mnt_idmap;
 extern struct user_namespace init_user_ns;
 
 typedef struct {
-- 
cgit v1.2.3


From c8770db2d54437a5f49417ae7b46f7de23d14db6 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Tue, 10 Sep 2024 15:08:22 -0400
Subject: tcp: check skb is non-NULL in tcp_rto_delta_us()

We have some machines running stock Ubuntu 20.04.6 which is their 5.4.0-174-generic
kernel that are running ceph and recently hit a null ptr dereference in
tcp_rearm_rto(). Initially hitting it from the TLP path, but then later we also
saw it getting hit from the RACK case as well. Here are examples of the oops
messages we saw in each of those cases:

Jul 26 15:05:02 rx [11061395.780353] BUG: kernel NULL pointer dereference, address: 0000000000000020
Jul 26 15:05:02 rx [11061395.787572] #PF: supervisor read access in kernel mode
Jul 26 15:05:02 rx [11061395.792971] #PF: error_code(0x0000) - not-present page
Jul 26 15:05:02 rx [11061395.798362] PGD 0 P4D 0
Jul 26 15:05:02 rx [11061395.801164] Oops: 0000 [#1] SMP NOPTI
Jul 26 15:05:02 rx [11061395.805091] CPU: 0 PID: 9180 Comm: msgr-worker-1 Tainted: G W 5.4.0-174-generic #193-Ubuntu
Jul 26 15:05:02 rx [11061395.814996] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023
Jul 26 15:05:02 rx [11061395.825952] RIP: 0010:tcp_rearm_rto+0xe4/0x160
Jul 26 15:05:02 rx [11061395.830656] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3
Jul 26 15:05:02 rx [11061395.849665] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246
Jul 26 15:05:02 rx [11061395.855149] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000
Jul 26 15:05:02 rx [11061395.862542] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60
Jul 26 15:05:02 rx [11061395.869933] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8
Jul 26 15:05:02 rx [11061395.877318] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900
Jul 26 15:05:02 rx [11061395.884710] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30
Jul 26 15:05:02 rx [11061395.892095] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000
Jul 26 15:05:02 rx [11061395.900438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jul 26 15:05:02 rx [11061395.906435] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0
Jul 26 15:05:02 rx [11061395.913822] PKRU: 55555554
Jul 26 15:05:02 rx [11061395.916786] Call Trace:
Jul 26 15:05:02 rx [11061395.919488]
Jul 26 15:05:02 rx [11061395.921765] ? show_regs.cold+0x1a/0x1f
Jul 26 15:05:02 rx [11061395.925859] ? __die+0x90/0xd9
Jul 26 15:05:02 rx [11061395.929169] ? no_context+0x196/0x380
Jul 26 15:05:02 rx [11061395.933088] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0
Jul 26 15:05:02 rx [11061395.938216] ? ip6_sublist_rcv_finish+0x3d/0x50
Jul 26 15:05:02 rx [11061395.943000] ? __bad_area_nosemaphore+0x50/0x1a0
Jul 26 15:05:02 rx [11061395.947873] ? bad_area_nosemaphore+0x16/0x20
Jul 26 15:05:02 rx [11061395.952486] ? do_user_addr_fault+0x267/0x450
Jul 26 15:05:02 rx [11061395.957104] ? ipv6_list_rcv+0x112/0x140
Jul 26 15:05:02 rx [11061395.961279] ? __do_page_fault+0x58/0x90
Jul 26 15:05:02 rx [11061395.965458] ? do_page_fault+0x2c/0xe0
Jul 26 15:05:02 rx [11061395.969465] ? page_fault+0x34/0x40
Jul 26 15:05:02 rx [11061395.973217] ? tcp_rearm_rto+0xe4/0x160
Jul 26 15:05:02 rx [11061395.977313] ? tcp_rearm_rto+0xe4/0x160
Jul 26 15:05:02 rx [11061395.981408] tcp_send_loss_probe+0x10b/0x220
Jul 26 15:05:02 rx [11061395.985937] tcp_write_timer_handler+0x1b4/0x240
Jul 26 15:05:02 rx [11061395.990809] tcp_write_timer+0x9e/0xe0
Jul 26 15:05:02 rx [11061395.994814] ? tcp_write_timer_handler+0x240/0x240
Jul 26 15:05:02 rx [11061395.999866] call_timer_fn+0x32/0x130
Jul 26 15:05:02 rx [11061396.003782] __run_timers.part.0+0x180/0x280
Jul 26 15:05:02 rx [11061396.008309] ? recalibrate_cpu_khz+0x10/0x10
Jul 26 15:05:02 rx [11061396.012841] ? native_x2apic_icr_write+0x30/0x30
Jul 26 15:05:02 rx [11061396.017718] ? lapic_next_event+0x21/0x30
Jul 26 15:05:02 rx [11061396.021984] ? clockevents_program_event+0x8f/0xe0
Jul 26 15:05:02 rx [11061396.027035] run_timer_softirq+0x2a/0x50
Jul 26 15:05:02 rx [11061396.031212] __do_softirq+0xd1/0x2c1
Jul 26 15:05:02 rx [11061396.035044] do_softirq_own_stack+0x2a/0x40
Jul 26 15:05:02 rx [11061396.039480]
Jul 26 15:05:02 rx [11061396.041840] do_softirq.part.0+0x46/0x50
Jul 26 15:05:02 rx [11061396.046022] __local_bh_enable_ip+0x50/0x60
Jul 26 15:05:02 rx [11061396.050460] _raw_spin_unlock_bh+0x1e/0x20
Jul 26 15:05:02 rx [11061396.054817] nf_conntrack_tcp_packet+0x29e/0xbe0 [nf_conntrack]
Jul 26 15:05:02 rx [11061396.060994] ? get_l4proto+0xe7/0x190 [nf_conntrack]
Jul 26 15:05:02 rx [11061396.066220] nf_conntrack_in+0xe9/0x670 [nf_conntrack]
Jul 26 15:05:02 rx [11061396.071618] ipv6_conntrack_local+0x14/0x20 [nf_conntrack]
Jul 26 15:05:02 rx [11061396.077356] nf_hook_slow+0x45/0xb0
Jul 26 15:05:02 rx [11061396.081098] ip6_xmit+0x3f0/0x5d0
Jul 26 15:05:02 rx [11061396.084670] ? ipv6_anycast_cleanup+0x50/0x50
Jul 26 15:05:02 rx [11061396.089282] ? __sk_dst_check+0x38/0x70
Jul 26 15:05:02 rx [11061396.093381] ? inet6_csk_route_socket+0x13b/0x200
Jul 26 15:05:02 rx [11061396.098346] inet6_csk_xmit+0xa7/0xf0
Jul 26 15:05:02 rx [11061396.102263] __tcp_transmit_skb+0x550/0xb30
Jul 26 15:05:02 rx [11061396.106701] tcp_write_xmit+0x3c6/0xc20
Jul 26 15:05:02 rx [11061396.110792] ? __alloc_skb+0x98/0x1d0
Jul 26 15:05:02 rx [11061396.114708] __tcp_push_pending_frames+0x37/0x100
Jul 26 15:05:02 rx [11061396.119667] tcp_push+0xfd/0x100
Jul 26 15:05:02 rx [11061396.123150] tcp_sendmsg_locked+0xc70/0xdd0
Jul 26 15:05:02 rx [11061396.127588] tcp_sendmsg+0x2d/0x50
Jul 26 15:05:02 rx [11061396.131245] inet6_sendmsg+0x43/0x70
Jul 26 15:05:02 rx [11061396.135075] __sock_sendmsg+0x48/0x70
Jul 26 15:05:02 rx [11061396.138994] ____sys_sendmsg+0x212/0x280
Jul 26 15:05:02 rx [11061396.143172] ___sys_sendmsg+0x88/0xd0
Jul 26 15:05:02 rx [11061396.147098] ? __seccomp_filter+0x7e/0x6b0
Jul 26 15:05:02 rx [11061396.151446] ? __switch_to+0x39c/0x460
Jul 26 15:05:02 rx [11061396.155453] ? __switch_to_asm+0x42/0x80
Jul 26 15:05:02 rx [11061396.159636] ? __switch_to_asm+0x5a/0x80
Jul 26 15:05:02 rx [11061396.163816] __sys_sendmsg+0x5c/0xa0
Jul 26 15:05:02 rx [11061396.167647] __x64_sys_sendmsg+0x1f/0x30
Jul 26 15:05:02 rx [11061396.171832] do_syscall_64+0x57/0x190
Jul 26 15:05:02 rx [11061396.175748] entry_SYSCALL_64_after_hwframe+0x5c/0xc1
Jul 26 15:05:02 rx [11061396.181055] RIP: 0033:0x7f1ef692618d
Jul 26 15:05:02 rx [11061396.184893] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 ca ee ff ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 2f 44 89 c7 48 89 44 24 08 e8 fe ee ff ff 48
Jul 26 15:05:02 rx [11061396.203889] RSP: 002b:00007f1ef4a26aa0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
Jul 26 15:05:02 rx [11061396.211708] RAX: ffffffffffffffda RBX: 000000000000084b RCX: 00007f1ef692618d
Jul 26 15:05:02 rx [11061396.219091] RDX: 0000000000004000 RSI: 00007f1ef4a26b10 RDI: 0000000000000275
Jul 26 15:05:02 rx [11061396.226475] RBP: 0000000000004000 R08: 0000000000000000 R09: 0000000000000020
Jul 26 15:05:02 rx [11061396.233859] R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000084b
Jul 26 15:05:02 rx [11061396.241243] R13: 00007f1ef4a26b10 R14: 0000000000000275 R15: 000055592030f1e8
Jul 26 15:05:02 rx [11061396.248628] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler nft_ct sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 mlx5_core hid_generic pci_hyperv_intf crc32_pclmul tls usbhid ahci mlxfw bnxt_en libahci hid nvme i2c_piix4 nvme_core wmi
Jul 26 15:05:02 rx [11061396.324334] CR2: 0000000000000020
Jul 26 15:05:02 rx [11061396.327944] ---[ end trace 68a2b679d1cfb4f1 ]---
Jul 26 15:05:02 rx [11061396.433435] RIP: 0010:tcp_rearm_rto+0xe4/0x160
Jul 26 15:05:02 rx [11061396.438137] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3
Jul 26 15:05:02 rx [11061396.457144] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246
Jul 26 15:05:02 rx [11061396.462629] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000
Jul 26 15:05:02 rx [11061396.470012] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60
Jul 26 15:05:02 rx [11061396.477396] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8
Jul 26 15:05:02 rx [11061396.484779] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900
Jul 26 15:05:02 rx [11061396.492164] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30
Jul 26 15:05:02 rx [11061396.499547] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000
Jul 26 15:05:02 rx [11061396.507886] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jul 26 15:05:02 rx [11061396.513884] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0
Jul 26 15:05:02 rx [11061396.521267] PKRU: 55555554
Jul 26 15:05:02 rx [11061396.524230] Kernel panic - not syncing: Fatal exception in interrupt
Jul 26 15:05:02 rx [11061396.530885] Kernel Offset: 0x1b200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
Jul 26 15:05:03 rx [11061396.660181] ---[ end Kernel panic - not syncing: Fatal
 exception in interrupt ]---

After we hit this we disabled TLP by setting tcp_early_retrans to 0 and then hit the crash in the RACK case:

Aug 7 07:26:16 rx [1006006.265582] BUG: kernel NULL pointer dereference, address: 0000000000000020
Aug 7 07:26:16 rx [1006006.272719] #PF: supervisor read access in kernel mode
Aug 7 07:26:16 rx [1006006.278030] #PF: error_code(0x0000) - not-present page
Aug 7 07:26:16 rx [1006006.283343] PGD 0 P4D 0
Aug 7 07:26:16 rx [1006006.286057] Oops: 0000 [#1] SMP NOPTI
Aug 7 07:26:16 rx [1006006.289896] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.4.0-174-generic #193-Ubuntu
Aug 7 07:26:16 rx [1006006.299107] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023
Aug 7 07:26:16 rx [1006006.309970] RIP: 0010:tcp_rearm_rto+0xe4/0x160
Aug 7 07:26:16 rx [1006006.314584] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3
Aug 7 07:26:16 rx [1006006.333499] RSP: 0018:ffffb42600a50960 EFLAGS: 00010246
Aug 7 07:26:16 rx [1006006.338895] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000
Aug 7 07:26:16 rx [1006006.346193] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff92d687ed8160
Aug 7 07:26:16 rx [1006006.353489] RBP: ffffb42600a50978 R08: 0000000000000000 R09: 00000000cd896dcc
Aug 7 07:26:16 rx [1006006.360786] R10: ffff92dc3404f400 R11: 0000000000000001 R12: ffff92d687ed8000
Aug 7 07:26:16 rx [1006006.368084] R13: ffff92d687ed8160 R14: 00000000cd896dcc R15: 00000000cd8fca81
Aug 7 07:26:16 rx [1006006.375381] FS: 0000000000000000(0000) GS:ffff93158ad40000(0000) knlGS:0000000000000000
Aug 7 07:26:16 rx [1006006.383632] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Aug 7 07:26:16 rx [1006006.389544] CR2: 0000000000000020 CR3: 0000003e775ce006 CR4: 0000000000760ee0
Aug 7 07:26:16 rx [1006006.396839] PKRU: 55555554
Aug 7 07:26:16 rx [1006006.399717] Call Trace:
Aug 7 07:26:16 rx [1006006.402335]
Aug 7 07:26:16 rx [1006006.404525] ? show_regs.cold+0x1a/0x1f
Aug 7 07:26:16 rx [1006006.408532] ? __die+0x90/0xd9
Aug 7 07:26:16 rx [1006006.411760] ? no_context+0x196/0x380
Aug 7 07:26:16 rx [1006006.415599] ? __bad_area_nosemaphore+0x50/0x1a0
Aug 7 07:26:16 rx [1006006.420392] ? _raw_spin_lock+0x1e/0x30
Aug 7 07:26:16 rx [1006006.424401] ? bad_area_nosemaphore+0x16/0x20
Aug 7 07:26:16 rx [1006006.428927] ? do_user_addr_fault+0x267/0x450
Aug 7 07:26:16 rx [1006006.433450] ? __do_page_fault+0x58/0x90
Aug 7 07:26:16 rx [1006006.437542] ? do_page_fault+0x2c/0xe0
Aug 7 07:26:16 rx [1006006.441470] ? page_fault+0x34/0x40
Aug 7 07:26:16 rx [1006006.445134] ? tcp_rearm_rto+0xe4/0x160
Aug 7 07:26:16 rx [1006006.449145] tcp_ack+0xa32/0xb30
Aug 7 07:26:16 rx [1006006.452542] tcp_rcv_established+0x13c/0x670
Aug 7 07:26:16 rx [1006006.456981] ? sk_filter_trim_cap+0x48/0x220
Aug 7 07:26:16 rx [1006006.461419] tcp_v6_do_rcv+0xdb/0x450
Aug 7 07:26:16 rx [1006006.465257] tcp_v6_rcv+0xc2b/0xd10
Aug 7 07:26:16 rx [1006006.468918] ip6_protocol_deliver_rcu+0xd3/0x4e0
Aug 7 07:26:16 rx [1006006.473706] ip6_input_finish+0x15/0x20
Aug 7 07:26:16 rx [1006006.477710] ip6_input+0xa2/0xb0
Aug 7 07:26:16 rx [1006006.481109] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0
Aug 7 07:26:16 rx [1006006.486151] ip6_sublist_rcv_finish+0x3d/0x50
Aug 7 07:26:16 rx [1006006.490679] ip6_sublist_rcv+0x1aa/0x250
Aug 7 07:26:16 rx [1006006.494779] ? ip6_rcv_finish_core.isra.0+0xa0/0xa0
Aug 7 07:26:16 rx [1006006.499828] ipv6_list_rcv+0x112/0x140
Aug 7 07:26:16 rx [1006006.503748] __netif_receive_skb_list_core+0x1a4/0x250
Aug 7 07:26:16 rx [1006006.509057] netif_receive_skb_list_internal+0x1a1/0x2b0
Aug 7 07:26:16 rx [1006006.514538] gro_normal_list.part.0+0x1e/0x40
Aug 7 07:26:16 rx [1006006.519068] napi_complete_done+0x91/0x130
Aug 7 07:26:16 rx [1006006.523352] mlx5e_napi_poll+0x18e/0x610 [mlx5_core]
Aug 7 07:26:16 rx [1006006.528481] net_rx_action+0x142/0x390
Aug 7 07:26:16 rx [1006006.532398] __do_softirq+0xd1/0x2c1
Aug 7 07:26:16 rx [1006006.536142] irq_exit+0xae/0xb0
Aug 7 07:26:16 rx [1006006.539452] do_IRQ+0x5a/0xf0
Aug 7 07:26:16 rx [1006006.542590] common_interrupt+0xf/0xf
Aug 7 07:26:16 rx [1006006.546421]
Aug 7 07:26:16 rx [1006006.548695] RIP: 0010:native_safe_halt+0xe/0x10
Aug 7 07:26:16 rx [1006006.553399] Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65
Aug 7 07:26:16 rx [1006006.572309] RSP: 0018:ffffb42600177e70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffc2
Aug 7 07:26:16 rx [1006006.580040] RAX: ffffffff8ed08b20 RBX: 0000000000000005 RCX: 0000000000000001
Aug 7 07:26:16 rx [1006006.587337] RDX: 00000000f48eeca2 RSI: 0000000000000082 RDI: 0000000000000082
Aug 7 07:26:16 rx [1006006.594635] RBP: ffffb42600177e90 R08: 0000000000000000 R09: 000000000000020f
Aug 7 07:26:16 rx [1006006.601931] R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000005
Aug 7 07:26:16 rx [1006006.609229] R13: ffff93157deb5f00 R14: 0000000000000000 R15: 0000000000000000
Aug 7 07:26:16 rx [1006006.616530] ? __cpuidle_text_start+0x8/0x8
Aug 7 07:26:16 rx [1006006.620886] ? default_idle+0x20/0x140
Aug 7 07:26:16 rx [1006006.624804] arch_cpu_idle+0x15/0x20
Aug 7 07:26:16 rx [1006006.628545] default_idle_call+0x23/0x30
Aug 7 07:26:16 rx [1006006.632640] do_idle+0x1fb/0x270
Aug 7 07:26:16 rx [1006006.636035] cpu_startup_entry+0x20/0x30
Aug 7 07:26:16 rx [1006006.640126] start_secondary+0x178/0x1d0
Aug 7 07:26:16 rx [1006006.644218] secondary_startup_64+0xa4/0xb0
Aug 7 07:26:17 rx [1006006.648568] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet ast mii drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 hid_generic mlx5_core pci_hyperv_intf crc32_pclmul usbhid ahci tls mlxfw bnxt_en hid libahci nvme i2c_piix4 nvme_core wmi [last unloaded: cpuid]
Aug 7 07:26:17 rx [1006006.726180] CR2: 0000000000000020
Aug 7 07:26:17 rx [1006006.729718] ---[ end trace e0e2e37e4e612984 ]---

Prior to seeing the first crash and on other machines we also see the warning in
tcp_send_loss_probe() where packets_out is non-zero, but both transmit and retrans
queues are empty so we know the box is seeing some accounting issue in this area:

Jul 26 09:15:27 kernel: ------------[ cut here ]------------
Jul 26 09:15:27 kernel: invalid inflight: 2 state 1 cwnd 68 mss 8988
Jul 26 09:15:27 kernel: WARNING: CPU: 16 PID: 0 at net/ipv4/tcp_output.c:2605 tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif joydev input_leds rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_he>
Jul 26 09:15:27 kernel: CPU: 16 PID: 0 Comm: swapper/16 Not tainted 5.4.0-174-generic #193-Ubuntu
Jul 26 09:15:27 kernel: Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023
Jul 26 09:15:27 kernel: RIP: 0010:tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: Code: 08 26 01 00 75 e2 41 0f b6 54 24 12 41 8b 8c 24 c0 06 00 00 45 89 f0 48 c7 c7 e0 b4 20 a7 c6 05 8d 08 26 01 01 e8 4a c0 0f 00 <0f> 0b eb ba 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41
Jul 26 09:15:27 kernel: RSP: 0018:ffffb7838088ce00 EFLAGS: 00010286
Jul 26 09:15:27 kernel: RAX: 0000000000000000 RBX: ffff9b84b5630430 RCX: 0000000000000006
Jul 26 09:15:27 kernel: RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff9b8e4621c8c0
Jul 26 09:15:27 kernel: RBP: ffffb7838088ce18 R08: 0000000000000927 R09: 0000000000000004
Jul 26 09:15:27 kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff9b84b5630000
Jul 26 09:15:27 kernel: R13: 0000000000000000 R14: 000000000000231c R15: ffff9b84b5630430
Jul 26 09:15:27 kernel: FS: 0000000000000000(0000) GS:ffff9b8e46200000(0000) knlGS:0000000000000000
Jul 26 09:15:27 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jul 26 09:15:27 kernel: CR2: 000056238cec2380 CR3: 0000003e49ede005 CR4: 0000000000760ee0
Jul 26 09:15:27 kernel: PKRU: 55555554
Jul 26 09:15:27 kernel: Call Trace:
Jul 26 09:15:27 kernel: <IRQ>
Jul 26 09:15:27 kernel: ? show_regs.cold+0x1a/0x1f
Jul 26 09:15:27 kernel: ? __warn+0x98/0xe0
Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: ? report_bug+0xd1/0x100
Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0
Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50
Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30
Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240
Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0
Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240
Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130
Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280
Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0
Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90
Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0
Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50
Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30
Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220
Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240
Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0
Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240
Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130
Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280
Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0
Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90
Jul 26 09:15:27 kernel: ? recalibrate_cpu_khz+0x10/0x10
Jul 26 09:15:27 kernel: ? ktime_get+0x3e/0xa0
Jul 26 09:15:27 kernel: ? native_x2apic_icr_write+0x30/0x30
Jul 26 09:15:27 kernel: run_timer_softirq+0x2a/0x50
Jul 26 09:15:27 kernel: __do_softirq+0xd1/0x2c1
Jul 26 09:15:27 kernel: irq_exit+0xae/0xb0
Jul 26 09:15:27 kernel: smp_apic_timer_interrupt+0x7b/0x140
Jul 26 09:15:27 kernel: apic_timer_interrupt+0xf/0x20
Jul 26 09:15:27 kernel: </IRQ>
Jul 26 09:15:27 kernel: RIP: 0010:native_safe_halt+0xe/0x10
Jul 26 09:15:27 kernel: Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 <c3> 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65
Jul 26 09:15:27 kernel: RSP: 0018:ffffb783801cfe70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
Jul 26 09:15:27 kernel: RAX: ffffffffa6908b20 RBX: 0000000000000010 RCX: 0000000000000001
Jul 26 09:15:27 kernel: RDX: 000000006fc0c97e RSI: 0000000000000082 RDI: 0000000000000082
Jul 26 09:15:27 kernel: RBP: ffffb783801cfe90 R08: 0000000000000000 R09: 0000000000000225
Jul 26 09:15:27 kernel: R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000010
Jul 26 09:15:27 kernel: R13: ffff9b8e390b0000 R14: 0000000000000000 R15: 0000000000000000
Jul 26 09:15:27 kernel: ? __cpuidle_text_start+0x8/0x8
Jul 26 09:15:27 kernel: ? default_idle+0x20/0x140
Jul 26 09:15:27 kernel: arch_cpu_idle+0x15/0x20
Jul 26 09:15:27 kernel: default_idle_call+0x23/0x30
Jul 26 09:15:27 kernel: do_idle+0x1fb/0x270
Jul 26 09:15:27 kernel: cpu_startup_entry+0x20/0x30
Jul 26 09:15:27 kernel: start_secondary+0x178/0x1d0
Jul 26 09:15:27 kernel: secondary_startup_64+0xa4/0xb0
Jul 26 09:15:27 kernel: ---[ end trace e7ac822987e33be1 ]---

The NULL ptr deref is coming from tcp_rto_delta_us() attempting to pull an skb
off the head of the retransmit queue and then dereferencing that skb to get the
skb_mstamp_ns value via tcp_skb_timestamp_us(skb).

The crash is the same one that was reported a # of years ago here:
https://lore.kernel.org/netdev/86c0f836-9a7c-438b-d81a-839be45f1f58@gmail.com/T/#t

and the kernel we're running has the fix which was added to resolve this issue.

Unfortunately we've been unsuccessful so far in reproducing this problem in the
lab and do not have the luxury of pushing out a new kernel to try and test if
newer kernels resolve this issue at the moment. I realize this is a report
against both an Ubuntu kernel and also an older 5.4 kernel. I have reported this
issue to Ubuntu here: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2077657
however I feel like since this issue has possibly cropped up again it makes
sense to build in some protection in this path (even on the latest kernel
versions) since the code in question just blindly assumes there's a valid skb
without testing if it's NULL b/f it looks at the timestamp.

Given we have seen crashes in this path before and now this case it seems like
we should protect ourselves for when packets_out accounting is incorrect.
While we should fix that root cause we should also just make sure the skb
is not NULL before dereferencing it. Also add a warn once here to capture
some information if/when the problem case is hit again.

Fixes: e1a10ef7fa87 ("tcp: introduce tcp_rto_delta_us() helper for xmit timer fix")
Signed-off-by: Josh Hunt <johunt@akamai.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f77f812bfbe7..d1948d357dad 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2435,9 +2435,26 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
 {
 	const struct sk_buff *skb = tcp_rtx_queue_head(sk);
 	u32 rto = inet_csk(sk)->icsk_rto;
-	u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
 
-	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+	if (likely(skb)) {
+		u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
+
+		return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+	} else {
+		WARN_ONCE(1,
+			"rtx queue emtpy: "
+			"out:%u sacked:%u lost:%u retrans:%u "
+			"tlp_high_seq:%u sk_state:%u ca_state:%u "
+			"advmss:%u mss_cache:%u pmtu:%u\n",
+			tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
+			tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
+			tcp_sk(sk)->tlp_high_seq, sk->sk_state,
+			inet_csk(sk)->icsk_ca_state,
+			tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
+			inet_csk(sk)->icsk_pmtu_cookie);
+		return jiffies_to_usecs(rto);
+	}
+
 }
 
 /*
-- 
cgit v1.2.3


From f8eb5bd9a818cc5f2a1e50b22b0091830b28cc36 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 23 Sep 2024 08:58:31 -0700
Subject: mm: fix build on 32-bit targets without MAX_PHYSMEM_BITS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The merge resolution to deal with the conflict between commits
ea72ce5da228 ("x86/kaslr: Expose and use the end of the physical memory
address space") and 99185c10d5d9 ("resource, kunit: add test case for
region_intersects()") ended up being broken in configurations didn't
define a MAX_PHYSMEM_BITS and that had a 32-bit 'phys_addr_t'.

The fallback to using all bits set (ie "(-1ULL)") ended up causing a
build error:

    kernel/resource.c: In function ‘gfr_start’:
    include/linux/minmax.h:93:30: error: conversion from ‘long long unsigned int’ to ‘resource_size_t’ {aka ‘unsigned int’} changes value from ‘18446744073709551615’ to ‘4294967295’ [-Werror=overflow]

this was reported by Geert for m68k, but he points out that it happens
on other 32-bit architectures too, eg mips, xtensa, parisc, and powerpc.

Limiting 'PHYSMEM_END' to a 'phys_addr_t' (which is the same as
'resource_size_t') fixes the build, but Geert points out that it will
then cause a silent overflow in mm/sparse.c:

	unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;

so we actually do want PHYSMEM_END to be defined a 64-bit type - just
not all ones, and not larger than 'phys_addr_t'.

The proper fix is probably to not have some kind of default fallback at
all, but just make sure every architecture has a valid MAX_PHYSMEM_BITS.
But in the meantime, this just applies the rule that PHYSMEM_END is the
largest value that fits in a 'phys_addr_t', but does not have the high
bit set in 64 bits.

Ugly, ugly.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b62437447077..ecf63d2b0582 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -101,7 +101,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
 # ifdef MAX_PHYSMEM_BITS
 # define PHYSMEM_END	((1ULL << MAX_PHYSMEM_BITS) - 1)
 # else
-# define PHYSMEM_END	(-1ULL)
+# define PHYSMEM_END	(((phys_addr_t)-1)&~(1ULL<<63))
 # endif
 #endif
 
-- 
cgit v1.2.3


From d98f72272500f505cd7e152ffa456e64ee3855f0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Sep 2024 12:32:03 +1000
Subject: nfs: simplify and guarantee owner uniqueness.

I have evidence of an Linux NFS client getting NFS4ERR_BAD_SEQID to a
v4.0 LOCK request to a Linux server (which had fixed the problem with
RELEASE_LOCKOWNER bug fixed).
The LOCK request presented a "new" lock owner so there are two seq ids
in the request: that for the open file, and that for the new lock.
Given the context I am confident that the new lock owner was reported to
have the wrong seqid.  As lock owner identifiers are reused, the server
must still have a lock owner active which the client thinks is no longer
active.

I wasn't able to determine a root-cause but the simplest fix seems to be
to ensure lock owners are always unique much as open owners are (thanks
to a time stamp).  The easiest way to ensure uniqueness is with a 64bit
counter for each server.  That will never cycle (if updated once a
nanosecond the last 584 years.  A single NFS server would not handle
open/lock requests nearly that fast, and a Linux node is unlikely to
have an uptime approaching that).

This patch removes the 2 ida and instead uses a per-server
atomic64_t to provide uniqueness.

Note that the lock owner already encodes the id as 64 bits even though
it is a 32bit value.  So changing to a 64bit value does not change the
encoding of the lock owner.  The open owner encoding is now 4 bytes
larger.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_fs_sb.h | 3 +--
 include/linux/nfs_xdr.h   | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1df86ab98c77..e1e47ebd83ef 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -234,8 +234,7 @@ struct nfs_server {
 	/* the following fields are protected by nfs_client->cl_lock */
 	struct rb_root		state_owners;
 #endif
-	struct ida		openowner_id;
-	struct ida		lockowner_id;
+	atomic64_t		owner_ctr;
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 45623af3e7b8..96ba04ab24f3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -446,7 +446,7 @@ struct nfs42_clone_res {
 
 struct stateowner_id {
 	__u64	create_time;
-	__u32	uniquifier;
+	__u64	uniquifier;
 };
 
 struct nfs4_open_delegation {
-- 
cgit v1.2.3


From 0b108e83795c9c23101f584ef7e3ab4f1f120ef0 Mon Sep 17 00:00:00 2001
From: Stephen Brennan <stephen.s.brennan@oracle.com>
Date: Mon, 19 Aug 2024 08:58:59 -0700
Subject: SUNRPC: convert RPC_TASK_* constants to enum

The RPC_TASK_* constants are defined as macros, which means that most
kernel builds will not contain their definitions in the debuginfo.
However, it's quite useful for debuggers to be able to view the task
state constant and interpret it correctly. Conversion to an enum will
ensure the constants are present in debuginfo and can be interpreted by
debuggers without needing to hard-code them and track their changes.

Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/sched.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 0c77ba488bba..fec1e8a1570c 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -151,13 +151,15 @@ struct rpc_task_setup {
 #define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 #define RPC_IS_MOVEABLE(t)	((t)->tk_flags & RPC_TASK_MOVEABLE)
 
-#define RPC_TASK_RUNNING	0
-#define RPC_TASK_QUEUED		1
-#define RPC_TASK_ACTIVE		2
-#define RPC_TASK_NEED_XMIT	3
-#define RPC_TASK_NEED_RECV	4
-#define RPC_TASK_MSG_PIN_WAIT	5
-#define RPC_TASK_SIGNALLED	6
+enum {
+	RPC_TASK_RUNNING,
+	RPC_TASK_QUEUED,
+	RPC_TASK_ACTIVE,
+	RPC_TASK_NEED_XMIT,
+	RPC_TASK_NEED_RECV,
+	RPC_TASK_MSG_PIN_WAIT,
+	RPC_TASK_SIGNALLED,
+};
 
 #define rpc_test_and_set_running(t) \
 				test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
-- 
cgit v1.2.3


From dfb07e990a0d019d7ae9b78dd4260620ce32e79a Mon Sep 17 00:00:00 2001
From: Dan Aloni <dan.aloni@vastdata.com>
Date: Wed, 24 Jul 2024 14:07:12 +0300
Subject: nfs: add 'noalignwrite' option for lock-less 'lost writes' prevention

There are some applications that write to predefined non-overlapping
file offsets from multiple clients and therefore don't need to rely on
file locking. However, if these applications want non-aligned offsets
and sizes they need to either use locks or risk data corruption, as the
NFS client defaults to extending writes to whole pages.

This commit adds a new mount option `noalignwrite`, which allows to turn
that off and avoid the need of locking, as long as these applications
don't overlap on offsets.

Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e1e47ebd83ef..c49bfdded5c1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -158,6 +158,7 @@ struct nfs_server {
 #define NFS_MOUNT_WRITE_WAIT		0x02000000
 #define NFS_MOUNT_TRUNK_DISCOVERY	0x04000000
 #define NFS_MOUNT_SHUTDOWN			0x08000000
+#define NFS_MOUNT_NO_ALIGNWRITE		0x10000000
 
 	unsigned int		fattr_valid;	/* Valid attributes */
 	unsigned int		caps;		/* server capabilities */
-- 
cgit v1.2.3


From 4806ded4c14c5e8fdc6ce885d83221a78c06a428 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:35 -0400
Subject: nfs_common: factor out nfs_errtbl and nfs_stat_to_errno

Common nfs_stat_to_errno() is used by both fs/nfs/nfs2xdr.c and
fs/nfs/nfs3xdr.c

Will also be used by fs/nfsd/localio.c

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_common.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/linux/nfs_common.h

(limited to 'include')

diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h
new file mode 100644
index 000000000000..3395c4a4d372
--- /dev/null
+++ b/include/linux/nfs_common.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains constants and methods used by both NFS client and server.
+ */
+#ifndef _LINUX_NFS_COMMON_H
+#define _LINUX_NFS_COMMON_H
+
+#include <linux/errno.h>
+#include <uapi/linux/nfs.h>
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+int nfs_stat_to_errno(enum nfs_stat status);
+
+#endif /* _LINUX_NFS_COMMON_H */
-- 
cgit v1.2.3


From 1fcb16674e37e434efe68ec3e142229f35b6b9e1 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:36 -0400
Subject: nfs_common: factor out nfs4_errtbl and nfs4_stat_to_errno

Common nfs4_stat_to_errno() is used by fs/nfs/nfs4xdr.c and will be
used by fs/nfs/localio.c

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_common.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h
index 3395c4a4d372..5fc02df88252 100644
--- a/include/linux/nfs_common.h
+++ b/include/linux/nfs_common.h
@@ -12,5 +12,6 @@
 #define errno_NFSERR_IO EIO
 
 int nfs_stat_to_errno(enum nfs_stat status);
+int nfs4_stat_to_errno(int stat);
 
 #endif /* _LINUX_NFS_COMMON_H */
-- 
cgit v1.2.3


From 1545e488b1f908b10f6dff0c278c6b7a37122de8 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:37 -0400
Subject: nfs: factor out {encode,decode}_opaque_fixed to nfs_xdr.h

Eliminates duplicate functions in various files to allow for
additional callers.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_xdr.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 96ba04ab24f3..12d8e47bc5a3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1853,6 +1853,24 @@ struct nfs_rpc_ops {
 	void	(*disable_swap)(struct inode *inode);
 };
 
+/*
+ * Helper functions used by NFS client and/or server
+ */
+static inline void encode_opaque_fixed(struct xdr_stream *xdr,
+				       const void *buf, size_t len)
+{
+	WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
+}
+
+static inline int decode_opaque_fixed(struct xdr_stream *xdr,
+				      void *buf, size_t len)
+{
+	ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
+	if (unlikely(ret < 0))
+		return -EIO;
+	return 0;
+}
+
 /*
  * Function vectors etc. for the NFS client
  */
@@ -1866,4 +1884,4 @@ extern const struct rpc_version nfs_version4;
 extern const struct rpc_version nfsacl_version3;
 extern const struct rpc_program nfsacl_program;
 
-#endif
+#endif /* _LINUX_NFS_XDR_H */
-- 
cgit v1.2.3


From 199f2128741077087a2ab33889a6868830465033 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 5 Sep 2024 15:09:46 -0400
Subject: SUNRPC: add svcauth_map_clnt_to_svc_cred_local

Add new funtion svcauth_map_clnt_to_svc_cred_local which maps a
generic cred to a svc_cred suitable for use in nfsd.

This is needed by the localio code to map nfs client creds to nfs
server credentials.

Following from net/sunrpc/auth_unix.c:unx_marshal() it is clear that
->fsuid and ->fsgid must be used (rather than ->uid and ->gid).  In
addition, these uid and gid must be translated with from_kuid_munged()
so local client uses correct uid and gid when acting as local server.

Jeff Layton noted:
  This is where the magic happens. Since we're working in
  kuid_t/kgid_t, we don't need to worry about further idmapping.

Suggested-by: NeilBrown <neilb@suse.de> # to approximate unx_marshal()
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Co-developed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/svcauth.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 63cf6fb26dcc..2e111153f7cd 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -14,6 +14,7 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/clnt.h>
 #include <linux/hash.h>
 #include <linux/stringhash.h>
 #include <linux/cred.h>
@@ -157,6 +158,10 @@ extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
 extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 
+extern void	svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt,
+						   const struct cred *,
+						   struct svc_cred *);
+
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
 extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
-- 
cgit v1.2.3


From 86ab08beb3f07f6e51922a8b8f662a5ec7012d35 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 5 Sep 2024 15:09:47 -0400
Subject: SUNRPC: replace program list with program array

A service created with svc_create_pooled() can be given a linked list of
programs and all of these will be served.

Using a linked list makes it cumbersome when there are several programs
that can be optionally selected with CONFIG settings.

After this patch is applied, API consumers must use only
svc_create_pooled() when creating an RPC service that listens for more
than one RPC program.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/svc.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c419a61f60e5..e68fecf6eab5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -67,9 +67,10 @@ enum {
  * We currently do not support more than one RPC program per daemon.
  */
 struct svc_serv {
-	struct svc_program *	sv_program;	/* RPC program */
+	struct svc_program *	sv_programs;	/* RPC programs */
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
+	unsigned int		sv_nprogs;	/* Number of sv_programs */
 	unsigned int		sv_nrthreads;	/* # of server threads */
 	unsigned int		sv_maxconn;	/* max connections allowed or
 						 * '0' causing max to be based
@@ -360,10 +361,9 @@ struct svc_process_info {
 };
 
 /*
- * List of RPC programs on the same transport endpoint
+ * RPC program - an array of these can use the same transport endpoint
  */
 struct svc_program {
-	struct svc_program *	pg_next;	/* other programs (same xprt) */
 	u32			pg_prog;	/* program number */
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
@@ -441,6 +441,7 @@ bool		   svc_rqst_replace_page(struct svc_rqst *rqstp,
 void		   svc_rqst_release_pages(struct svc_rqst *rqstp);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *prog,
+				     unsigned int nprog,
 				     struct svc_stat *stats,
 				     unsigned int bufsize,
 				     int (*threadfn)(void *data));
-- 
cgit v1.2.3


From 2a33a85be45178198245e1f656e6224c899895e4 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:48 -0400
Subject: nfs_common: add NFS LOCALIO auxiliary protocol enablement

fs/nfs_common/nfslocalio.c provides interfaces that enable an NFS
client to generate a nonce (single-use UUID) and associated nfs_uuid_t
struct, register it with nfs_common for subsequent lookup and
verification by the NFS server and if matched the NFS server populates
members in the nfs_uuid_t struct.

nfs_common's nfs_uuids list is the basis for localio enablement, as
such it has members that point to nfsd memory for direct use by the
client (e.g. 'net' is the server's network namespace, through it the
client can access nn->nfsd_serv).

This commit also provides the base nfs_uuid_t interfaces to allow
proper net namespace refcounting for the LOCALIO use case.

CONFIG_NFS_LOCALIO controls the nfs_common, NFS server and NFS client
enablement for LOCALIO. If both NFS_FS=m and NFSD=m then
NFS_COMMON_LOCALIO_SUPPORT=m and nfs_localio.ko is built (and provides
nfs_common's LOCALIO support).

  # lsmod | grep nfs_localio
  nfs_localio            12288  2 nfsd,nfs
  sunrpc                745472  35 nfs_localio,nfsd,auth_rpcgss,lockd,nfsv3,nfs

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Co-developed-by: NeilBrown <neilb@suse.de>
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/nfslocalio.h

(limited to 'include')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
new file mode 100644
index 000000000000..4165ff8390c1
--- /dev/null
+++ b/include/linux/nfslocalio.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+#ifndef __LINUX_NFSLOCALIO_H
+#define __LINUX_NFSLOCALIO_H
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/uuid.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/nfs.h>
+#include <net/net_namespace.h>
+
+/*
+ * Useful to allow a client to negotiate if localio
+ * possible with its server.
+ *
+ * See Documentation/filesystems/nfs/localio.rst for more detail.
+ */
+typedef struct {
+	uuid_t uuid;
+	struct list_head list;
+	struct net *net; /* nfsd's network namespace */
+	struct auth_domain *dom; /* auth_domain for localio */
+} nfs_uuid_t;
+
+void nfs_uuid_begin(nfs_uuid_t *);
+void nfs_uuid_end(nfs_uuid_t *);
+void nfs_uuid_is_local(const uuid_t *, struct list_head *,
+		       struct net *, struct auth_domain *, struct module *);
+void nfs_uuid_invalidate_clients(struct list_head *list);
+void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid);
+
+#endif  /* __LINUX_NFSLOCALIO_H */
-- 
cgit v1.2.3


From a61e147e6be6e763d9c6dec8061d2893c0bb3423 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:49 -0400
Subject: nfs_common: prepare for the NFS client to use nfsd_file for LOCALIO

The next commit will introduce nfsd_open_local_fh() which returns an
nfsd_file structure.  This commit exposes LOCALIO's required NFSD
symbols to the NFS client:

- Make nfsd_open_local_fh() symbol and other required NFSD symbols
  available to NFS in a global 'nfs_to' nfsd_localio_operations
  struct (global access suggested by Trond, nfsd_localio_operations
  suggested by NeilBrown).  The next commit will also introduce
  nfsd_localio_ops_init() that init_nfsd() will call to initialize
  'nfs_to'.

- Introduce nfsd_file_file() that provides access to nfsd_file's
  backing file.  Keeps nfsd_file structure opaque to NFS client (as
  suggested by Jeff Layton).

- Introduce nfsd_file_put_local() that will put the reference to the
  nfsd_file's associated nn->nfsd_serv and then put the reference to
  the nfsd_file (as suggested by NeilBrown).

Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com> # nfs_to
Suggested-by: NeilBrown <neilb@suse.de> # nfsd_localio_operations
Suggested-by: Jeff Layton <jlayton@kernel.org> # nfsd_file_file
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 4165ff8390c1..1a39d7d727bd 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/uuid.h>
+#include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/nfs.h>
 #include <net/net_namespace.h>
@@ -22,7 +23,7 @@
 typedef struct {
 	uuid_t uuid;
 	struct list_head list;
-	struct net *net; /* nfsd's network namespace */
+	struct net __rcu *net; /* nfsd's network namespace */
 	struct auth_domain *dom; /* auth_domain for localio */
 } nfs_uuid_t;
 
@@ -33,4 +34,28 @@ void nfs_uuid_is_local(const uuid_t *, struct list_head *,
 void nfs_uuid_invalidate_clients(struct list_head *list);
 void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid);
 
+struct nfsd_file;
+
+/* localio needs to map filehandle -> struct nfsd_file */
+extern struct nfsd_file *
+nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *,
+		   const struct cred *, const struct nfs_fh *,
+		   const fmode_t) __must_hold(rcu);
+
+struct nfsd_localio_operations {
+	bool (*nfsd_serv_try_get)(struct net *);
+	void (*nfsd_serv_put)(struct net *);
+	struct nfsd_file *(*nfsd_open_local_fh)(struct net *,
+						struct auth_domain *,
+						struct rpc_clnt *,
+						const struct cred *,
+						const struct nfs_fh *,
+						const fmode_t);
+	void (*nfsd_file_put_local)(struct nfsd_file *);
+	struct file *(*nfsd_file_file)(struct nfsd_file *);
+} ____cacheline_aligned;
+
+extern void nfsd_localio_ops_init(void);
+extern const struct nfsd_localio_operations *nfs_to;
+
 #endif  /* __LINUX_NFSLOCALIO_H */
-- 
cgit v1.2.3


From fa4983862e506d395acc1b8d14dbebf63acc2e82 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 5 Sep 2024 15:09:50 -0400
Subject: nfsd: add LOCALIO support

Add server support for bypassing NFS for localhost reads, writes, and
commits. This is only useful when both the client and server are
running on the same host.

If nfsd_open_local_fh() fails then the NFS client will both retry and
fallback to normal network-based read, write and commit operations if
localio is no longer supported.

Care is taken to ensure the same NFS security mechanisms are used
(authentication, etc) regardless of whether localio or regular NFS
access is used.  The auth_domain established as part of the traditional
NFS client access to the NFS server is also used for localio.  Store
auth_domain for localio in nfsd_uuid_t and transfer it to the client
if it is local to the server.

Relative to containers, localio gives the client access to the network
namespace the server has.  This is required to allow the client to
access the server's per-namespace nfsd_net struct.

This commit also introduces the use of NFSD's percpu_ref to interlock
nfsd_destroy_serv and nfsd_open_local_fh, to ensure nn->nfsd_serv is
not destroyed while in use by nfsd_open_local_fh and other LOCALIO
client code.

CONFIG_NFS_LOCALIO enables NFS server support for LOCALIO.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Co-developed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Co-developed-by: NeilBrown <neilb@suse.de>
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 1a39d7d727bd..546f46b4e46f 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -6,6 +6,8 @@
 #ifndef __LINUX_NFSLOCALIO_H
 #define __LINUX_NFSLOCALIO_H
 
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/uuid.h>
@@ -58,4 +60,10 @@ struct nfsd_localio_operations {
 extern void nfsd_localio_ops_init(void);
 extern const struct nfsd_localio_operations *nfs_to;
 
+#else   /* CONFIG_NFS_LOCALIO */
+static inline void nfsd_localio_ops_init(void)
+{
+}
+#endif  /* CONFIG_NFS_LOCALIO */
+
 #endif  /* __LINUX_NFSLOCALIO_H */
-- 
cgit v1.2.3


From 946af9b3a0e7571c01447107d5e8968401e659ba Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:51 -0400
Subject: nfsd: implement server support for NFS_LOCALIO_PROGRAM

The LOCALIO auxiliary RPC protocol consists of a single "UUID_IS_LOCAL"
RPC method that allows the Linux NFS client to verify the local Linux
NFS server can see the nonce (single-use UUID) the client generated and
made available in nfs_common.  The server expects this protocol to use
the same transport as NFS and NFSACL for its RPCs.  This protocol
isn't part of an IETF standard, nor does it need to be considering it
is Linux-to-Linux auxiliary RPC protocol that amounts to an
implementation detail.

The UUID_IS_LOCAL method encodes the client generated uuid_t in terms of
the fixed UUID_SIZE (16 bytes).  The fixed size opaque encode and decode
XDR methods are used instead of the less efficient variable sized
methods.

The RPC program number for the NFS_LOCALIO_PROGRAM is 400122 (as assigned
by IANA, see https://www.iana.org/assignments/rpc-program-numbers/ ):
Linux Kernel Organization       400122  nfslocalio

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
[neilb: factored out and simplified single localio protocol]
Co-developed-by: NeilBrown <neilb@suse.de>
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index ceb70a926b95..73da75908d95 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -13,6 +13,13 @@
 #include <linux/crc32.h>
 #include <uapi/linux/nfs.h>
 
+/* The LOCALIO program is entirely private to Linux and is
+ * NOT part of the uapi.
+ */
+#define NFS_LOCALIO_PROGRAM		400122
+#define LOCALIOPROC_NULL		0
+#define LOCALIOPROC_UUID_IS_LOCAL	1
+
 /*
  * This is the kernel NFS client file handle representation
  */
-- 
cgit v1.2.3


From df24c483e28f7f9a421afde15d0497e61bc2d3ea Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 5 Sep 2024 15:09:52 -0400
Subject: nfs: pass struct nfsd_file to nfs_init_pgio and nfs_init_commit

The nfsd_file will be passed, in future commits, by callers
that enable LOCALIO support (for both regular NFS and pNFS IO).

[Derived from patch authored by Weston Andros Adamson, but switched
 from passing struct file to struct nfsd_file]

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 546f46b4e46f..2c4e0fd9da6b 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -6,6 +6,9 @@
 #ifndef __LINUX_NFSLOCALIO_H
 #define __LINUX_NFSLOCALIO_H
 
+/* nfsd_file structure is purposely kept opaque to NFS client */
+struct nfsd_file;
+
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 
 #include <linux/module.h>
@@ -36,8 +39,6 @@ void nfs_uuid_is_local(const uuid_t *, struct list_head *,
 void nfs_uuid_invalidate_clients(struct list_head *list);
 void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid);
 
-struct nfsd_file;
-
 /* localio needs to map filehandle -> struct nfsd_file */
 extern struct nfsd_file *
 nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *,
-- 
cgit v1.2.3


From 70ba381e1a431245c137ed597ec6a05991c79bd9 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 5 Sep 2024 15:09:53 -0400
Subject: nfs: add LOCALIO support

Add client support for bypassing NFS for localhost reads, writes, and
commits. This is only useful when the client and the server are
running on the same host.

nfs_local_probe() is stubbed out, later commits will enable client and
server handshake via a Linux-only LOCALIO auxiliary RPC protocol.

This has dynamic binding with the nfsd module (via nfs_localio module
which is part of nfs_common). LOCALIO will only work if nfsd is
already loaded.

The "localio_enabled" nfs kernel module parameter can be used to
disable and enable the ability to use LOCALIO support.

CONFIG_NFS_LOCALIO enables NFS client support for LOCALIO.

Lastly, LOCALIO uses an nfsd_file to initiate all IO. To make proper
use of nfsd_file (and nfsd's filecache) its lifetime (duration before
nfsd_file_put is called) must extend until after commit, read and
write operations. So rather than immediately drop the nfsd_file
reference in nfs_local_open_fh(), that doesn't happen until
nfs_local_pgio_release() for read/write and not until
nfs_local_release_commit_data() for commit. The same applies to the
reference held on nfsd's nn->nfsd_serv. Both objects' lifetimes and
associated references are managed through calls to
nfs_to->nfsd_file_put_local().

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Co-developed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de> # nfs_open_local_fh
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs.h        | 2 ++
 include/linux/nfs_fs_sb.h  | 9 +++++++++
 include/linux/nfslocalio.h | 4 ++++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 73da75908d95..9ad727ddfedb 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -8,6 +8,8 @@
 #ifndef _LINUX_NFS_H
 #define _LINUX_NFS_H
 
+#include <linux/cred.h>
+#include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/string.h>
 #include <linux/crc32.h>
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c49bfdded5c1..853df3fcd4c2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -8,6 +8,7 @@
 #include <linux/wait.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/xprt.h>
+#include <linux/nfslocalio.h>
 
 #include <linux/atomic.h>
 #include <linux/refcount.h>
@@ -49,6 +50,7 @@ struct nfs_client {
 #define NFS_CS_DS		7		/* - Server is a DS */
 #define NFS_CS_REUSEPORT	8		/* - reuse src port on reconnect */
 #define NFS_CS_PNFS		9		/* - Server used for pnfs */
+#define NFS_CS_LOCAL_IO		10		/* - client is local */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
@@ -125,6 +127,13 @@ struct nfs_client {
 	struct net		*cl_net;
 	struct list_head	pending_cb_stateids;
 	struct rcu_head		rcu;
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+	struct timespec64	cl_nfssvc_boot;
+	seqlock_t		cl_boot_lock;
+	nfs_uuid_t		cl_uuid;
+	spinlock_t		cl_localio_lock;
+#endif /* CONFIG_NFS_LOCALIO */
 };
 
 /*
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 2c4e0fd9da6b..b353abe00357 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -61,6 +61,10 @@ struct nfsd_localio_operations {
 extern void nfsd_localio_ops_init(void);
 extern const struct nfsd_localio_operations *nfs_to;
 
+struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
+		   struct rpc_clnt *, const struct cred *,
+		   const struct nfs_fh *, const fmode_t);
+
 #else   /* CONFIG_NFS_LOCALIO */
 static inline void nfsd_localio_ops_init(void)
 {
-- 
cgit v1.2.3


From 88801d043b1d16caae76a5e2e5991e8b1f55ce7f Mon Sep 17 00:00:00 2001
From: Jiqian Chen <Jiqian.Chen@amd.com>
Date: Tue, 24 Sep 2024 14:14:35 +0800
Subject: xen/pci: Add a function to reset device for xen

When device on dom0 side has been reset, the vpci on Xen side
won't get notification, so that the cached state in vpci is
all out of date with the real device state.
To solve that problem, add a new function to clear all vpci
device state when device is reset on dom0 side.

And call that function in pcistub_init_device. Because when
using "pci-assignable-add" to assign a passthrough device in
Xen, it will reset passthrough device and the vpci state will
out of date, and then device will fail to restore bar state.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Message-ID: <20240924061437.2636766-2-Jiqian.Chen@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/interface/physdev.h | 17 +++++++++++++++++
 include/xen/pci.h               |  6 ++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index a237af867873..df74e65a884b 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -256,6 +256,13 @@ struct physdev_pci_device_add {
  */
 #define PHYSDEVOP_prepare_msix          30
 #define PHYSDEVOP_release_msix          31
+/*
+ * Notify the hypervisor that a PCI device has been reset, so that any
+ * internally cached state is regenerated.  Should be called after any
+ * device reset performed by the hardware domain.
+ */
+#define PHYSDEVOP_pci_device_reset      32
+
 struct physdev_pci_device {
     /* IN */
     uint16_t seg;
@@ -263,6 +270,16 @@ struct physdev_pci_device {
     uint8_t devfn;
 };
 
+struct pci_device_reset {
+    struct physdev_pci_device dev;
+#define PCI_DEVICE_RESET_COLD 0x0
+#define PCI_DEVICE_RESET_WARM 0x1
+#define PCI_DEVICE_RESET_HOT  0x2
+#define PCI_DEVICE_RESET_FLR  0x3
+#define PCI_DEVICE_RESET_MASK 0x3
+    uint32_t flags;
+};
+
 #define PHYSDEVOP_DBGP_RESET_PREPARE    1
 #define PHYSDEVOP_DBGP_RESET_DONE       2
 
diff --git a/include/xen/pci.h b/include/xen/pci.h
index b8337cf85fd1..424b8ea89ca8 100644
--- a/include/xen/pci.h
+++ b/include/xen/pci.h
@@ -4,10 +4,16 @@
 #define __XEN_PCI_H__
 
 #if defined(CONFIG_XEN_DOM0)
+int xen_reset_device(const struct pci_dev *dev);
 int xen_find_device_domain_owner(struct pci_dev *dev);
 int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
 int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
+static inline int xen_reset_device(const struct pci_dev *dev)
+{
+	return -1;
+}
+
 static inline int xen_find_device_domain_owner(struct pci_dev *dev)
 {
 	return -1;
-- 
cgit v1.2.3


From b166b8ab4189743a717cb93f50d6fcca3a46770d Mon Sep 17 00:00:00 2001
From: Jiqian Chen <Jiqian.Chen@amd.com>
Date: Tue, 24 Sep 2024 14:14:36 +0800
Subject: xen/pvh: Setup gsi for passthrough device

In PVH dom0, the gsis don't get registered, but the gsi of
a passthrough device must be configured for it to be able to be
mapped into a domU.

When assigning a device to passthrough, proactively setup the gsi
of the device during that process.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Message-ID: <20240924061437.2636766-3-Jiqian.Chen@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/linux/acpi.h |  1 +
 include/xen/acpi.h   | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0687a442fec7..02ded9f53a6b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -362,6 +362,7 @@ void acpi_unregister_gsi (u32 gsi);
 
 struct pci_dev;
 
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
 bool acpi_isa_irq_available(int irq);
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index b1e11863144d..3bcfe82d9078 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -67,10 +67,28 @@ static inline void xen_acpi_sleep_register(void)
 		acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
 	}
 }
+int xen_pvh_setup_gsi(int gsi, int trigger, int polarity);
+int xen_acpi_get_gsi_info(struct pci_dev *dev,
+						  int *gsi_out,
+						  int *trigger_out,
+						  int *polarity_out);
 #else
 static inline void xen_acpi_sleep_register(void)
 {
 }
+
+static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
+{
+	return -1;
+}
+
+static inline int xen_acpi_get_gsi_info(struct pci_dev *dev,
+						  int *gsi_out,
+						  int *trigger_out,
+						  int *polarity_out)
+{
+	return -1;
+}
 #endif
 
 #endif	/* _XEN_ACPI_H */
-- 
cgit v1.2.3


From 2fae6bb7be320270801b3c3b040189bd7daa8056 Mon Sep 17 00:00:00 2001
From: Jiqian Chen <Jiqian.Chen@amd.com>
Date: Tue, 24 Sep 2024 14:14:37 +0800
Subject: xen/privcmd: Add new syscall to get gsi from dev

On PVH dom0, when passthrough a device to domU, QEMU and xl tools
want to use gsi number to do pirq mapping, see QEMU code
xen_pt_realize->xc_physdev_map_pirq, and xl code
pci_add_dm_done->xc_physdev_map_pirq, but in current codes, the gsi
number is got from file /sys/bus/pci/devices/<sbdf>/irq, that is
wrong, because irq is not equal with gsi, they are in different
spaces, so pirq mapping fails.
And in current linux codes, there is no method to get gsi
for userspace.

For above purpose, record gsi of pcistub devices when init
pcistub and add a new syscall into privcmd to let userspace
can get gsi when they have a need.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Message-ID: <20240924061437.2636766-4-Jiqian.Chen@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/uapi/xen/privcmd.h | 7 +++++++
 include/xen/acpi.h         | 9 +++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index 8b8c5d1420fe..8e2c8fd44764 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -126,6 +126,11 @@ struct privcmd_ioeventfd {
 	__u8 pad[2];
 };
 
+struct privcmd_pcidev_get_gsi {
+	__u32 sbdf;
+	__u32 gsi;
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -157,5 +162,7 @@ struct privcmd_ioeventfd {
 	_IOW('P', 8, struct privcmd_irqfd)
 #define IOCTL_PRIVCMD_IOEVENTFD					\
 	_IOW('P', 9, struct privcmd_ioeventfd)
+#define IOCTL_PRIVCMD_PCIDEV_GET_GSI				\
+	_IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index 3bcfe82d9078..daa96a22d257 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -91,4 +91,13 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev,
 }
 #endif
 
+#ifdef CONFIG_XEN_PCI_STUB
+int pcistub_get_gsi_from_sbdf(unsigned int sbdf);
+#else
+static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
+{
+	return -1;
+}
+#endif
+
 #endif	/* _XEN_ACPI_H */
-- 
cgit v1.2.3


From 2efddb5575cd9f5f4d61ad417c92365a5f18d2f1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 25 Sep 2024 10:57:00 +0200
Subject: Revert "driver core: shut down devices asynchronously"

This reverts commit 8064952c65045f05ee2671fe437770e50c151776.

The series is being reverted before -rc1 as there are still reports of
lockups on shutdown, so it's not quite ready for "prime time."

Reported-by: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Jeffery <djeffery@redhat.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device/driver.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 14c9211b82d6..5c04b8e3833b 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -56,7 +56,6 @@ enum probe_type {
  * @mod_name:	Used for built-in modules.
  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
  * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
- * @async_shutdown_enable: Enables devices to be shutdown asynchronously.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
  * @probe:	Called to query the existence of a specific device,
@@ -103,7 +102,6 @@ struct device_driver {
 
 	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
 	enum probe_type probe_type;
-	bool async_shutdown_enable;
 
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
-- 
cgit v1.2.3


From ce4db753de21abfb7516ef64aff907813e8a8e3e Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 11:25:52 +0800
Subject: kprobes: Remove obsoleted declaration for init_test_probes

The init_test_probes() have been removed since
commit e44e81c5b90f ("kprobes: convert tests to kunit"), and now
it is useless, so remove it.

Link: https://lore.kernel.org/all/20240826032552.4016314-1-cuigaosheng1@huawei.com/

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/kprobes.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 5fcbc254d186..8c4f3bb24429 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -269,15 +269,6 @@ extern unsigned long __stop_kprobe_blacklist[];
 
 extern struct kretprobe_blackpoint kretprobe_blacklist[];
 
-#ifdef CONFIG_KPROBES_SANITY_TEST
-extern int init_test_probes(void);
-#else /* !CONFIG_KPROBES_SANITY_TEST */
-static inline int init_test_probes(void)
-{
-	return 0;
-}
-#endif /* CONFIG_KPROBES_SANITY_TEST */
-
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
-- 
cgit v1.2.3


From 08377ed24feef66866d0c6aabcae0aec515cf2ca Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jason.andryuk@amd.com>
Date: Fri, 23 Aug 2024 15:36:26 -0400
Subject: xen: sync elfnote.h from xen tree

Sync Xen's elfnote.h header from xen.git to pull in the
XEN_ELFNOTE_PHYS32_RELOC define.

xen commit dfc9fab00378 ("x86/PVH: Support relocatable dom0 kernels")

This is a copy except for the removal of the emacs editor config at the
end of the file.

Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Message-ID: <20240823193630.2583107-2-jason.andryuk@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/interface/elfnote.h | 93 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 88 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 38deb1214613..918f47d87d7a 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -11,7 +11,9 @@
 #define __XEN_PUBLIC_ELFNOTE_H__
 
 /*
- * The notes should live in a SHT_NOTE segment and have "Xen" in the
+ * `incontents 200 elfnotes ELF notes
+ *
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
  * name field.
  *
  * Numeric types are either 4 or 8 bytes depending on the content of
@@ -22,6 +24,8 @@
  *
  * String values (for non-legacy) are NULL terminated ASCII, also known
  * as ASCIZ type.
+ *
+ * Xen only uses ELF Notes contained in x86 binaries.
  */
 
 /*
@@ -52,7 +56,7 @@
 #define XEN_ELFNOTE_VIRT_BASE      3
 
 /*
- * The offset of the ELF paddr field from the acutal required
+ * The offset of the ELF paddr field from the actual required
  * pseudo-physical address (numeric).
  *
  * This is used to maintain backwards compatibility with older kernels
@@ -92,7 +96,12 @@
 #define XEN_ELFNOTE_LOADER         8
 
 /*
- * The kernel supports PAE (x86/32 only, string = "yes" or "no").
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
  *
  * LEGACY: PAE (n.b. The legacy interface included a provision to
  * indicate 'extended-cr3' support allowing L3 page tables to be
@@ -149,7 +158,9 @@
  * The (non-default) location the initial phys-to-machine map should be
  * placed at by the hypervisor (Dom0) or the tools (DomU).
  * The kernel must be prepared for this mapping to be established using
- * large pages, despite such otherwise not being available to guests.
+ * large pages, despite such otherwise not being available to guests. Note
+ * that these large pages may be misaligned in PFN space (they'll obviously
+ * be aligned in MFN and virtual address spaces).
  * The kernel must also be able to handle the page table pages used for
  * this mapping not being accessible through the initial mapping.
  * (Only x86-64 supports this at present.)
@@ -185,9 +196,81 @@
  */
 #define XEN_ELFNOTE_PHYS32_ENTRY 18
 
+/*
+ * Physical loading constraints for PVH kernels
+ *
+ * The presence of this note indicates the kernel supports relocating itself.
+ *
+ * The note may include up to three 32bit values to place constraints on the
+ * guest physical loading addresses and alignment for a PVH kernel.  Values
+ * are read in the following order:
+ *  - a required start alignment (default 0x200000)
+ *  - a minimum address for the start of the image (default 0; see below)
+ *  - a maximum address for the last byte of the image (default 0xffffffff)
+ *
+ * When this note specifies an alignment value, it is used.  Otherwise the
+ * maximum p_align value from loadable ELF Program Headers is used, if it is
+ * greater than or equal to 4k (0x1000).  Otherwise, the default is used.
+ */
+#define XEN_ELFNOTE_PHYS32_RELOC 19
+
 /*
  * The number of the highest elfnote defined.
  */
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE               0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER             0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION        0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION     0x2000003
 
 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
-- 
cgit v1.2.3


From d5dbf8b48a4620db771f399ed7fce32d447f04a6 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Sun, 18 Aug 2024 19:42:58 +0900
Subject: tracepoint: Support iterating over tracepoints on modules

Add for_each_module_tracepoint() for iterating over tracepoints
on modules. This is similar to the for_each_kernel_tracepoint()
but only for the tracepoints on modules (not including kernel
built-in tracepoints).

Link: https://lore.kernel.org/all/172397777800.286558.14554748203446214056.stgit@devnote2/

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/tracepoint.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 6be396bb4297..837fcf8ec0d5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -64,6 +64,8 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+				void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -79,6 +81,11 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
 {
 	return 0;
 }
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+				void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
-- 
cgit v1.2.3


From d4df54f338e43c790460674a3cc7db35b8395421 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Sun, 18 Aug 2024 19:43:07 +0900
Subject: tracepoint: Support iterating tracepoints in a loading module

Add for_each_tracepoint_in_module() function to iterate tracepoints in
a module. This API is needed for handling tracepoints in a loading
module from tracepoint_module_notifier callback function.
This also update for_each_module_tracepoint() to pass the module to
callback function so that it can find module easily.

Link: https://lore.kernel.org/all/172397778740.286558.15781131277732977643.stgit@devnote2/

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/tracepoint.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 837fcf8ec0d5..93a9f3070b48 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -64,8 +64,13 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
-void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+					struct module *, void *),
 				void *priv);
+void for_each_tracepoint_in_module(struct module *,
+				   void (*fct)(struct tracepoint *,
+					struct module *, void *),
+				   void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -82,10 +87,18 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
 	return 0;
 }
 static inline
-void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+					struct module *, void *),
 				void *priv)
 {
 }
+static inline
+void for_each_tracepoint_in_module(struct module *mod,
+				   void (*fct)(struct tracepoint *,
+					struct module *, void *),
+				   void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
-- 
cgit v1.2.3


From 04e906839a053f092ef53f4fb2d610983412b904 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 19 Sep 2024 14:33:42 +0200
Subject: usbnet: fix cyclical race on disconnect with work queue

The work can submit URBs and the URBs can schedule the work.
This cycle needs to be broken, when a device is to be stopped.
Use a flag to do so.
This is a design issue as old as the driver.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/20240919123525.688065-1-oneukum@suse.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/usb/usbnet.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9f08a584d707..0b9f1e598e3a 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -76,8 +76,23 @@ struct usbnet {
 #		define EVENT_LINK_CHANGE	11
 #		define EVENT_SET_RX_MODE	12
 #		define EVENT_NO_IP_ALIGN	13
+/* This one is special, as it indicates that the device is going away
+ * there are cyclic dependencies between tasklet, timer and bh
+ * that must be broken
+ */
+#		define EVENT_UNPLUG		31
 };
 
+static inline bool usbnet_going_away(struct usbnet *ubn)
+{
+	return test_bit(EVENT_UNPLUG, &ubn->flags);
+}
+
+static inline void usbnet_mark_going_away(struct usbnet *ubn)
+{
+	set_bit(EVENT_UNPLUG, &ubn->flags);
+}
+
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
 {
 	return to_usb_driver(intf->dev.driver);
-- 
cgit v1.2.3


From 8af79d3edb5fd2dce35ea0a71595b6d4f9962350 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 18 Sep 2024 15:13:39 +0200
Subject: netfilter: nfnetlink_queue: remove old clash resolution logic

For historical reasons there are two clash resolution spots in
netfilter, one in nfnetlink_queue and one in conntrack core.

nfnetlink_queue one was added first: If a colliding entry is found, NAT
NAT transformation is reversed by calling nat engine again with altered
tuple.

See commit 368982cd7d1b ("netfilter: nfnetlink_queue: resolve clash for
unconfirmed conntracks") for details.

One problem is that nf_reroute() won't take an action if the queueing
doesn't occur in the OUTPUT hook, i.e. when queueing in forward or
postrouting, packet will be sent via the wrong path.

Another problem is that the scenario addressed (2nd UDP packet sent with
identical addresses while first packet is still being processed) can also
occur without any nfqueue involvement due to threaded resolvers doing
A and AAAA requests back-to-back.

This lead us to add clash resolution logic to the conntrack core, see
commit 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing
entries").  Instead of fixing the nfqueue based logic, lets remove it
and let conntrack core handle this instead.

Retain the ->update hook for sake of nfqueue based conntrack helpers.
We could axe this hook completely but we'd have to split confirm and
helper logic again, see commit ee04805ff54a ("netfilter: conntrack: make
conntrack userspace helpers work again").

This SHOULD NOT be backported to kernels earlier than v5.6; they lack
adequate clash resolution handling.

Patch was originally written by Pablo Neira Ayuso.

Reported-by: Antonio Ojea <aojea@google.com>
Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1766
Signed-off-by: Florian Westphal <fw@strlen.de>
Tested-by: Antonio Ojea <aojea@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2683b2b77612..2b8aac2c70ad 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -376,15 +376,11 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 struct nf_conn;
 enum nf_nat_manip_type;
 struct nlattr;
-enum ip_conntrack_dir;
 
 struct nf_nat_hook {
 	int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
 			       const struct nlattr *attr);
 	void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
-	unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
-				  enum nf_nat_manip_type mtype,
-				  enum ip_conntrack_dir dir);
 	void (*remove_nat_bysrc)(struct nf_conn *ct);
 };
 
-- 
cgit v1.2.3


From bfc4a245a794841cba5cf287034a0f60d3087402 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 26 Sep 2024 08:35:24 +0200
Subject: dma-mapping: fix DMA API tracing for chained scatterlists

scatterlist allocations can be chained, and thus all iterations need to
use the chain-aware iterators.  Switch the newly added tracing to use the
proper iterators so that they work with chained scatterlists.

Fixes: 038eb433dc14 ("dma-mapping: add tracing for dma-mapping API calls")
Reported-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sean Anderson <sean.anderson@linux.dev>
Tested-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com
---
 include/trace/events/dma.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index f57f05331d73..569f86a44aaa 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -176,9 +176,9 @@ TRACE_EVENT(dma_free,
 );
 
 TRACE_EVENT(dma_map_sg,
-	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
 		 int ents, enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, sg, nents, ents, dir, attrs),
+	TP_ARGS(dev, sgl, nents, ents, dir, attrs),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
@@ -190,17 +190,17 @@ TRACE_EVENT(dma_map_sg,
 	),
 
 	TP_fast_assign(
+		struct scatterlist *sg;
 		int i;
 
 		__assign_str(device);
-		for (i = 0; i < nents; i++)
-			((u64 *)__get_dynamic_array(phys_addrs))[i] =
-				sg_phys(sg + i);
-		for (i = 0; i < ents; i++) {
+		for_each_sg(sgl, sg, nents, i)
+			((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
+		for_each_sg(sgl, sg, ents, i) {
 			((u64 *)__get_dynamic_array(dma_addrs))[i] =
-				sg_dma_address(sg + i);
+				sg_dma_address(sg);
 			((unsigned int *)__get_dynamic_array(lengths))[i] =
-				sg_dma_len(sg + i);
+				sg_dma_len(sg);
 		}
 		__entry->dir = dir;
 		__entry->attrs = attrs;
@@ -222,9 +222,9 @@ TRACE_EVENT(dma_map_sg,
 );
 
 TRACE_EVENT(dma_unmap_sg,
-	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
 		 enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, sg, nents, dir, attrs),
+	TP_ARGS(dev, sgl, nents, dir, attrs),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
@@ -234,12 +234,12 @@ TRACE_EVENT(dma_unmap_sg,
 	),
 
 	TP_fast_assign(
+		struct scatterlist *sg;
 		int i;
 
 		__assign_str(device);
-		for (i = 0; i < nents; i++)
-			((u64 *)__get_dynamic_array(addrs))[i] =
-				sg_phys(sg + i);
+		for_each_sg(sgl, sg, nents, i)
+			((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg);
 		__entry->dir = dir;
 		__entry->attrs = attrs;
 	),
@@ -290,9 +290,9 @@ DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device,
 	TP_ARGS(dev, dma_addr, size, dir));
 
 DECLARE_EVENT_CLASS(dma_sync_sg,
-	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
 		 enum dma_data_direction dir),
-	TP_ARGS(dev, sg, nents, dir),
+	TP_ARGS(dev, sgl, nents, dir),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
@@ -302,14 +302,15 @@ DECLARE_EVENT_CLASS(dma_sync_sg,
 	),
 
 	TP_fast_assign(
+		struct scatterlist *sg;
 		int i;
 
 		__assign_str(device);
-		for (i = 0; i < nents; i++) {
+		for_each_sg(sgl, sg, nents, i) {
 			((u64 *)__get_dynamic_array(dma_addrs))[i] =
-				sg_dma_address(sg + i);
+				sg_dma_address(sg);
 			((unsigned int *)__get_dynamic_array(lengths))[i] =
-				sg_dma_len(sg + i);
+				sg_dma_len(sg);
 		}
 		__entry->dir = dir;
 	),
-- 
cgit v1.2.3


From a78282e2c94f4ca80a2d7c56e4d1e9546be5596d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 26 Sep 2024 11:39:02 -0700
Subject: Revert "binfmt_elf, coredump: Log the reason of the failed core
 dumps"

This reverts commit fb97d2eb542faf19a8725afbd75cbc2518903210.

The logging was questionable to begin with, but it seems to actively
deadlock on the task lock.

 "On second thought, let's not log core dump failures. 'Tis a silly place"

because if you can't tell your core dump is truncated, maybe you should
just fix your debugger instead of adding bugs to the kernel.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Link: https://lore.kernel.org/all/d122ece6-3606-49de-ae4d-8da88846bef2@oracle.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coredump.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index edeb8532ce0f..45e598fe3476 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 int dump_user_range(struct coredump_params *cprm, unsigned long start,
 		    unsigned long len);
-extern int do_coredump(const kernel_siginfo_t *siginfo);
+extern void do_coredump(const kernel_siginfo_t *siginfo);
 
 /*
  * Logging for the coredump code, ratelimited.
@@ -62,11 +62,7 @@ extern int do_coredump(const kernel_siginfo_t *siginfo);
 #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
 
 #else
-static inline int do_coredump(const kernel_siginfo_t *siginfo)
-{
-	/* Coredump support is not available, can't fail. */
-	return 0;
-}
+static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
 
 #define coredump_report(...)
 #define coredump_report_failure(...)
-- 
cgit v1.2.3


From 26a8ea80929c518bdec5e53a5776f95919b7c88e Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare@oracle.com>
Date: Tue, 3 Sep 2024 07:25:19 -0700
Subject: mm/hugetlb: fix memfd_pin_folios resv_huge_pages leak

memfd_pin_folios followed by unpin_folios leaves resv_huge_pages elevated
if the pages were not already faulted in.  During a normal page fault,
resv_huge_pages is consumed here:

hugetlb_fault()
  alloc_hugetlb_folio()
    dequeue_hugetlb_folio_vma()
      dequeue_hugetlb_folio_nodemask()
        dequeue_hugetlb_folio_node_exact()
          free_huge_pages--
      resv_huge_pages--

During memfd_pin_folios, the page is created by calling
alloc_hugetlb_folio_nodemask instead of alloc_hugetlb_folio, and
resv_huge_pages is not modified:

memfd_alloc_folio()
  alloc_hugetlb_folio_nodemask()
    dequeue_hugetlb_folio_nodemask()
      dequeue_hugetlb_folio_node_exact()
        free_huge_pages--

alloc_hugetlb_folio_nodemask has other callers that must not modify
resv_huge_pages.  Therefore, to fix, define an alternate version of
alloc_hugetlb_folio_nodemask for this call site that adjusts
resv_huge_pages.

Link: https://lkml.kernel.org/r/1725373521-451395-4-git-send-email-steven.sistare@oracle.com
Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios")
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 98c47c394b89..e4697539b665 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 				nodemask_t *nmask, gfp_t gfp_mask,
 				bool allow_alloc_fallback);
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+					  nodemask_t *nmask, gfp_t gfp_mask);
+
 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
 			pgoff_t idx);
 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -1059,6 +1062,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 	return NULL;
 }
 
+static inline struct folio *
+alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+			    nodemask_t *nmask, gfp_t gfp_mask)
+{
+	return NULL;
+}
+
 static inline struct folio *
 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 			nodemask_t *nmask, gfp_t gfp_mask,
-- 
cgit v1.2.3


From c5b1184decc819756ae549ba54c63b6790c4ddfd Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Tue, 24 Sep 2024 14:27:10 +0800
Subject: compiler.h: specify correct attribute for .rodata..c_jump_table

Currently, there is an assembler message when generating kernel/bpf/core.o
under CONFIG_OBJTOOL with LoongArch compiler toolchain:

  Warning: setting incorrect section attributes for .rodata..c_jump_table

This is because the section ".rodata..c_jump_table" should be readonly,
but there is a "W" (writable) part of the flags:

  $ readelf -S kernel/bpf/core.o | grep -A 1 "rodata..c"
  [34] .rodata..c_j[...] PROGBITS         0000000000000000  0000d2e0
       0000000000000800  0000000000000000  WA       0     0     8

There is no above issue on x86 due to the generated section flag is only
"A" (allocatable). In order to silence the warning on LoongArch, specify
the attribute like ".rodata..c_jump_table,\"a\",@progbits #" explicitly,
then the section attribute of ".rodata..c_jump_table" must be readonly
in the kernel/bpf/core.o file.

Before:

  $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c"
   21 .rodata..c_jump_table 00000800  0000000000000000  0000000000000000  0000d2e0  2**3
                  CONTENTS, ALLOC, LOAD, RELOC, DATA

After:

  $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c"
   21 .rodata..c_jump_table 00000800  0000000000000000  0000000000000000  0000d2e0  2**3
                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA

By the way, AFAICT, maybe the root cause is related with the different
compiler behavior of various archs, so to some extent this change is a
workaround for LoongArch, and also there is no effect for x86 which is the
only port supported by objtool before LoongArch with this patch.

Link: https://lkml.kernel.org/r/20240924062710.1243-1-yangtiezhu@loongson.cn
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>	[6.9+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ec55bcce4146..4d4e23b6e3e7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define annotate_unreachable() __annotate_unreachable(__COUNTER__)
 
 /* Annotate a C jump table to allow objtool to follow the code flow */
-#define __annotate_jump_table __section(".rodata..c_jump_table")
+#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #")
 
 #else /* !CONFIG_OBJTOOL */
 #define annotate_reachable()
-- 
cgit v1.2.3


From cb787f4ac0c2e439ea8d7e6387b925f74576bdf8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 27 Sep 2024 02:56:11 +0100
Subject: [tree-wide] finally take no_llseek out

no_llseek had been defined to NULL two years ago, in commit 868941b14441
("fs: remove no_llseek")

To quote that commit,

  At -rc1 we'll need do a mechanical removal of no_llseek -

  git grep -l -w no_llseek | grep -v porting.rst | while read i; do
	sed -i '/\<no_llseek\>/d' $i
  done

  would do it.

Unfortunately, that hadn't been done.  Linus, could you do that now, so
that we could finally put that thing to rest? All instances are of the
form
	.llseek = no_llseek,
so it's obviously safe.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/debugfs.h | 1 -
 include/linux/fs.h      | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index c9c65b132c0f..0928a6c8ae1e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -57,7 +57,6 @@ static const struct file_operations __fops = {				\
 	.release = simple_attr_release,					\
 	.read	 = debugfs_attr_read,					\
 	.write	 = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write,	\
-	.llseek  = no_llseek,						\
 }
 
 #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt)		\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eae5b67e4a15..e3c603d01337 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3234,7 +3234,6 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
-#define no_llseek NULL
 extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-- 
cgit v1.2.3