From e2b43fb25243d502ad36b07bab9de09f4b76fff9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 1 Dec 2025 17:50:48 -0800
Subject: KVM: x86: Apply runtime updates to current CPUID during
 KVM_SET_CPUID{,2}

When handling KVM_SET_CPUID{,2}, do runtime CPUID updates on the vCPU's
current CPUID (and caps) prior to swapping in the incoming CPUID state so
that KVM doesn't lose pending updates if the incoming CPUID is rejected,
and to prevent a false failure on the equality check.

Note, runtime updates are unconditionally performed on the incoming/new
CPUID (and associated caps), i.e. clearing the dirty flag won't negatively
affect the new CPUID.

Fixes: 93da6af3ae56 ("KVM: x86: Defer runtime updates of dynamic CPUID bits until CPUID emulation")
Reported-by: Igor Mammedov <imammedo@redhat.com>
Closes: https://lore.kernel.org/all/20251128123202.68424a95@imammedo
Cc: stable@vger.kernel.org
Acked-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Igor Mammedov <imammedo@redhat.com>
Link: https://patch.msgid.link/20251202015049.1167490-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 52524e0ca97f..913ffb995279 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -509,11 +509,18 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
 	u32 vcpu_caps[NR_KVM_CPU_CAPS];
 	int r;
 
+	/*
+	 * Apply pending runtime CPUID updates to the current CPUID entries to
+	 * avoid false positives due to mismatches on KVM-owned feature flags.
+	 */
+	if (vcpu->arch.cpuid_dynamic_bits_dirty)
+		kvm_update_cpuid_runtime(vcpu);
+
 	/*
 	 * Swap the existing (old) entries with the incoming (new) entries in
 	 * order to massage the new entries, e.g. to account for dynamic bits
-	 * that KVM controls, without clobbering the current guest CPUID, which
-	 * KVM needs to preserve in order to unwind on failure.
+	 * that KVM controls, without losing the current guest CPUID, which KVM
+	 * needs to preserve in order to unwind on failure.
 	 *
 	 * Similarly, save the vCPU's current cpu_caps so that the capabilities
 	 * can be updated alongside the CPUID entries when performing runtime
-- 
cgit v1.2.3


From da01f64e7470988f8607776aa7afa924208863fb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Nov 2025 14:56:13 -0800
Subject: KVM: nSVM: Clear exit_code_hi in VMCB when synthesizing nested
 VM-Exits

Explicitly clear exit_code_hi in the VMCB when synthesizing "normal"
nested VM-Exits, as the full exit code is a 64-bit value (spoiler alert),
and all exit codes for non-failing VMRUN use only bits 31:0.

Cc: Jim Mattson <jmattson@google.com>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: stable@vger.kernel.org
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251113225621.1688428-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/svm.c | 2 ++
 arch/x86/kvm/svm/svm.h | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9d29b2e7e855..eeeb4ae4c617 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2435,6 +2435,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
 
 	if (cr0 ^ val) {
 		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+		svm->vmcb->control.exit_code_hi = 0;
 		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
 	}
 
@@ -4611,6 +4612,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	if (static_cpu_has(X86_FEATURE_NRIPS))
 		vmcb->control.next_rip  = info->next_rip;
 	vmcb->control.exit_code = icpt_info.exit_code;
+	vmcb->control.exit_code_hi = 0;
 	vmexit = nested_svm_exit_handled(svm);
 
 	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index dd78e6402345..e66a16e59b1a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -764,9 +764,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
 
 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
 {
-	svm->vmcb->control.exit_code   = exit_code;
-	svm->vmcb->control.exit_info_1 = 0;
-	svm->vmcb->control.exit_info_2 = 0;
+	svm->vmcb->control.exit_code	= exit_code;
+	svm->vmcb->control.exit_code_hi	= 0;
+	svm->vmcb->control.exit_info_1	= 0;
+	svm->vmcb->control.exit_info_2	= 0;
 	return nested_svm_vmexit(svm);
 }
 
-- 
cgit v1.2.3


From f402ecd7a8b6446547076f4bd24bd5d4dcc94481 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Nov 2025 14:56:14 -0800
Subject: KVM: nSVM: Set exit_code_hi to -1 when synthesizing SVM_EXIT_ERR
 (failed VMRUN)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set exit_code_hi to -1u as a temporary band-aid to fix a long-standing
(effectively since KVM's inception) bug where KVM treats the exit code as
a 32-bit value, when in reality it's a 64-bit value.  Per the APM, offset
0x70 is a single 64-bit value:

  070h 63:0 EXITCODE

And a sane reading of the error values defined in "Table C-1. SVM Intercept
Codes" is that negative values use the full 64 bits:

  –1 VMEXIT_INVALID Invalid guest state in VMCB.
  –2 VMEXIT_BUSYBUSY bit was set in the VMSA
  –3 VMEXIT_IDLE_REQUIREDThe sibling thread is not in an idle state
  -4 VMEXIT_INVALID_PMC Invalid PMC state

And that interpretation is confirmed by testing on Milan and Turin (by
setting bits in CR0[63:32] to generate VMEXIT_INVALID on VMRUN).

Furthermore, Xen has treated exitcode as a 64-bit value since HVM support
was adding in 2006 (see Xen commit d1bd157fbc ("Big merge the HVM
full-virtualisation abstractions.")).

Cc: Jim Mattson <jmattson@google.com>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: stable@vger.kernel.org
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251113225621.1688428-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/nested.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index da6e80b3ac35..143a0ef02b03 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -983,7 +983,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 	if (!nested_vmcb_check_save(vcpu) ||
 	    !nested_vmcb_check_controls(vcpu)) {
 		vmcb12->control.exit_code    = SVM_EXIT_ERR;
-		vmcb12->control.exit_code_hi = 0;
+		vmcb12->control.exit_code_hi = -1u;
 		vmcb12->control.exit_info_1  = 0;
 		vmcb12->control.exit_info_2  = 0;
 		goto out;
@@ -1016,7 +1016,7 @@ out_exit_err:
 	svm->soft_int_injected = false;
 
 	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
-	svm->vmcb->control.exit_code_hi = 0;
+	svm->vmcb->control.exit_code_hi = -1u;
 	svm->vmcb->control.exit_info_1  = 0;
 	svm->vmcb->control.exit_info_2  = 0;
 
-- 
cgit v1.2.3


From b2849bec936be642b5420801f902337f2507648e Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Fri, 5 Dec 2025 15:19:04 -0800
Subject: KVM: VMX: Update SVI during runtime APICv activation

The APICv (apic->apicv_active) can be activated or deactivated at runtime,
for instance, because of APICv inhibit reasons. Intel VMX employs different
mechanisms to virtualize LAPIC based on whether APICv is active.

When APICv is activated at runtime, GUEST_INTR_STATUS is used to configure
and report the current pending IRR and ISR states. Unless a specific vector
is explicitly included in EOI_EXIT_BITMAP, its EOI will not be trapped to
KVM. Intel VMX automatically clears the corresponding ISR bit based on the
GUEST_INTR_STATUS.SVI field.

When APICv is deactivated at runtime, the VM_ENTRY_INTR_INFO_FIELD is used
to specify the next interrupt vector to invoke upon VM-entry. The
VMX IDT_VECTORING_INFO_FIELD is used to report un-invoked vectors on
VM-exit. EOIs are always trapped to KVM, so the software can manually clear
pending ISR bits.

There are scenarios where, with APICv activated at runtime, a guest-issued
EOI may not be able to clear the pending ISR bit.

Taking vector 236 as an example, here is one scenario.

1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
3. After VM-entry, vector 236 is invoked through the guest IDT. At this
point, the data in VM_ENTRY_INTR_INFO_FIELD is no longer valid. The guest
interrupt handler for vector 236 is invoked.
4. Suppose a VM exit occurs very early in the guest interrupt handler,
before the EOI is issued.
5. Nothing is reported through the IDT_VECTORING_INFO_FIELD because
vector 236 has already been invoked in the guest.
6. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
kvm_vcpu_update_apicv() to activate APICv.
7. Unfortunately, GUEST_INTR_STATUS.SVI is not configured, although
vector 236 is still pending in the ISR.
8. After VM-entry, the guest finally issues the EOI for vector 236.
However, because SVI is not configured, vector 236 is not cleared.
9. ISR is stalled forever on vector 236.

Here is another scenario.

1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
3. VM-exit occurs immediately after the next VM-entry. The vector 236 is
not invoked through the guest IDT. Instead, it is saved to the
IDT_VECTORING_INFO_FIELD during the VM-exit.
4. KVM calls kvm_queue_interrupt() to re-queue the un-invoked vector 236
into vcpu->arch.interrupt. A KVM_REQ_EVENT is requested.
5. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
kvm_vcpu_update_apicv() to activate APICv.
6. Although APICv is now active, KVM still uses the legacy
VM_ENTRY_INTR_INFO_FIELD to re-inject vector 236. GUEST_INTR_STATUS.SVI is
not configured.
7. After the next VM-entry, vector 236 is invoked through the guest IDT.
Finally, an EOI occurs. However, due to the lack of GUEST_INTR_STATUS.SVI
configuration, vector 236 is not cleared from the ISR.
8. ISR is stalled forever on vector 236.

Using QEMU as an example, vector 236 is stuck in ISR forever.

(qemu) info lapic 1
dumping local APIC state for CPU 1

LVT0	 0x00010700 active-hi edge  masked                      ExtINT (vec 0)
LVT1	 0x00010400 active-hi edge  masked                      NMI
LVTPC	 0x00000400 active-hi edge                              NMI
LVTERR	 0x000000fe active-hi edge                              Fixed  (vec 254)
LVTTHMR	 0x00010000 active-hi edge  masked                      Fixed  (vec 0)
LVTT	 0x000400ec active-hi edge                 tsc-deadline Fixed  (vec 236)
Timer	 DCR=0x0 (divide by 2) initial_count = 0 current_count = 0
SPIV	 0x000001ff APIC enabled, focus=off, spurious vec 255
ICR	 0x000000fd physical edge de-assert no-shorthand
ICR2	 0x00000000 cpu 0 (X2APIC ID)
ESR	 0x00000000
ISR	 236
IRR	 37(level) 236

The issue isn't applicable to AMD SVM as KVM simply writes vmcb01 directly
irrespective of whether L1 (vmcs01) or L2 (vmcb02) is active (unlike VMX,
there is no need/cost to switch between VMCBs).  In addition,
APICV_INHIBIT_REASON_IRQWIN ensures AMD SVM AVIC is not activated until
the last interrupt is EOI'd.

Fix the bug by configuring Intel VMX GUEST_INTR_STATUS.SVI if APICv is
activated at runtime.

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Link: https://patch.msgid.link/20251110063212.34902-1-dongli.zhang@oracle.com
[sean: call out that SVM writes vmcb01 directly, tweak comment]
Link: https://patch.msgid.link/20251205231913.441872-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 9 ---------
 arch/x86/kvm/x86.c     | 7 +++++++
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 91b6f2f3edc2..c3b9eb72b6f3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6886,15 +6886,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 	 * VM-Exit, otherwise L1 with run with a stale SVI.
 	 */
 	if (is_guest_mode(vcpu)) {
-		/*
-		 * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
-		 * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
-		 * Note, userspace can stuff state while L2 is active; assert
-		 * that VID is disabled if and only if the vCPU is in KVM_RUN
-		 * to avoid false positives if userspace is setting APIC state.
-		 */
-		WARN_ON_ONCE(vcpu->wants_to_run &&
-			     nested_cpu_has_vid(get_vmcs12(vcpu)));
 		to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
 		return;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705..82036205945f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10877,9 +10877,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
 	 * still active when the interrupt got accepted. Make sure
 	 * kvm_check_and_inject_events() is called to check for that.
+	 *
+	 * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
+	 * highest bit in vISR and the next accelerated EOI in the guest won't
+	 * be virtualized correctly (the CPU uses SVI to determine which vISR
+	 * vector to clear).
 	 */
 	if (!apic->apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
+	else
+		kvm_apic_update_hwapic_isr(vcpu);
 
 out:
 	preempt_enable();
-- 
cgit v1.2.3


From 29763138830916f46daaa50e83e7f4f907a3236b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Fri, 5 Dec 2025 15:19:05 -0800
Subject: KVM: nVMX: Immediately refresh APICv controls as needed on nested
 VM-Exit

If an APICv status updated was pended while L2 was active, immediately
refresh vmcs01's controls instead of pending KVM_REQ_APICV_UPDATE as
kvm_vcpu_update_apicv() only calls into vendor code if a change is
necessary.

E.g. if APICv is inhibited, and then activated while L2 is running:

  kvm_vcpu_update_apicv()
  |
  -> __kvm_vcpu_update_apicv()
     |
     -> apic->apicv_active = true
      |
      -> vmx_refresh_apicv_exec_ctrl()
         |
         -> vmx->nested.update_vmcs01_apicv_status = true
          |
          -> return

Then L2 exits to L1:

  __nested_vmx_vmexit()
  |
  -> kvm_make_request(KVM_REQ_APICV_UPDATE)

  vcpu_enter_guest(): KVM_REQ_APICV_UPDATE
  -> kvm_vcpu_update_apicv()
     |
     -> __kvm_vcpu_update_apicv()
        |
        -> return // because if (apic->apicv_active == activate)

Reported-by: Chao Gao <chao.gao@intel.com>
Closes: https://lore.kernel.org/all/aQ2jmnN8wUYVEawF@intel.com
Fixes: 7c69661e225c ("KVM: nVMX: Defer APICv updates while L2 is active until L1 is active")
Cc: stable@vger.kernel.org
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
[sean: write changelog]
Link: https://patch.msgid.link/20251205231913.441872-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/nested.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642f..1725c6a94f99 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
 #include "trace.h"
 #include "vmx.h"
 #include "smm.h"
+#include "x86_ops.h"
 
 static bool __read_mostly enable_shadow_vmcs = 1;
 module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 
 	if (vmx->nested.update_vmcs01_apicv_status) {
 		vmx->nested.update_vmcs01_apicv_status = false;
-		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+		vmx_refresh_apicv_exec_ctrl(vcpu);
 	}
 
 	if (vmx->nested.update_vmcs01_hwapic_isr) {
-- 
cgit v1.2.3


From 189e5deb944a6f9c7992355d60bffd8ec2e54a9c Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Thu, 4 Dec 2025 13:59:16 +0100
Subject: bpf, arm64: Do not audit capability check in do_jit()

Analogically to the x86 commit 881a9c9cb785 ("bpf: Do not audit
capability check in do_jit()"), change the capable() call to
ns_capable_noaudit() in order to avoid spurious SELinux denials in audit
log.

The commit log from that commit applies here as well:
"""
The failure of this check only results in a security mitigation being
applied, slightly affecting performance of the compiled BPF program. It
doesn't result in a failed syscall, an thus auditing a failed LSM
permission check for it is unwanted. For example with SELinux, it causes
a denial to be reported for confined processes running as root, which
tends to be flagged as a problem to be fixed in the policy. Yet
dontauditing or allowing CAP_SYS_ADMIN to the domain may not be
desirable, as it would allow/silence also other checks - either going
against the principle of least privilege or making debugging potentially
harder.

Fix it by changing it from capable() to ns_capable_noaudit(), which
instructs the LSMs to not audit the resulting denials.
"""

Fixes: f300769ead03 ("arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users")
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Link: https://lore.kernel.org/r/20251204125916.441021-1-omosnace@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm64/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 74dd29816f36..b6eb7a465ad2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
 	    arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
 		return;
 
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
 		return;
 
 	if (supports_clearbhb(SCOPE_SYSTEM)) {
-- 
cgit v1.2.3


From ca45c84afb8c91a8d688b0012657099c24f59266 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 3 Dec 2025 19:32:15 -0800
Subject: bpf: Add bpf_has_frame_pointer()

Introduce a bpf_has_frame_pointer() helper that unwinders can call to
determine whether a given instruction pointer is within the valid frame
pointer region of a BPF JIT program or trampoline (i.e., after the
prologue, before the epilogue).

This will enable livepatch (with the ORC unwinder) to reliably unwind
through BPF JIT frames.

Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/fd2bc5b4e261a680774b28f6100509fd5ebad2f0.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b69dc7194e2c..b0bac2a66eff 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	emit_prologue(&prog, image, stack_depth,
 		      bpf_prog_was_classic(bpf_prog), tail_call_reachable,
 		      bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+
+	bpf_prog->aux->ksym.fp_start = prog - temp;
+
 	/* Exception callback will clobber callee regs for its own use, and
 	 * restore the original callee regs from main prog's stack frame.
 	 */
@@ -2736,6 +2739,8 @@ emit_jmp:
 					pop_r12(&prog);
 			}
 			EMIT1(0xC9);         /* leave */
+			bpf_prog->aux->ksym.fp_end = prog - temp;
+
 			emit_return(&prog, image + addrs[i - 1] + (prog - temp));
 			break;
 
@@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	}
 	EMIT1(0x55);		 /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	if (im)
+		im->ksym.fp_start = prog - (u8 *)rw_image;
+
 	if (!is_imm8(stack_size)) {
 		/* sub rsp, stack_size */
 		EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 
 	emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
+
 	EMIT1(0xC9); /* leave */
+	if (im)
+		im->ksym.fp_end = prog - (u8 *)rw_image;
+
 	if (flags & BPF_TRAMP_F_SKIP_FRAME) {
 		/* skip our return address and return to parent */
 		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
-- 
cgit v1.2.3


From 01bc3b6db18d6e0a2e93c37885996bf339bfe337 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 3 Dec 2025 19:32:16 -0800
Subject: x86/unwind/orc: Support reliable unwinding through BPF stack frames

BPF JIT programs and trampolines use a frame pointer, so the current ORC
unwinder strategy of falling back to frame pointers (when an ORC entry
is missing) usually works in practice when unwinding through BPF JIT
stack frames.

However, that frame pointer fallback is just a guess, so the unwind gets
marked unreliable for live patching, which can cause livepatch
transition stalls.

Make the common case reliable by calling the bpf_has_frame_pointer()
helper to detect the valid frame pointer region of BPF JIT programs and
trampolines.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Closes: https://lore.kernel.org/0e555733-c670-4e84-b2e6-abb8b84ade38@crowdstrike.com
Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/a18505975662328c8ffb1090dded890c6f8c1004.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/kernel/unwind_orc.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 977ee75e047c..f610fde2d5c4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
 #include <linux/objtool.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/bpf.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
@@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
 }
 #endif
 
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.type		= ORC_TYPE_CALL,
+	.sp_reg		= ORC_REG_BP,
+	.sp_offset	= 16,
+	.bp_reg		= ORC_REG_PREV_SP,
+	.bp_offset	= -16,
+};
+
+static struct orc_entry *orc_bpf_find(unsigned long ip)
+{
+#ifdef CONFIG_BPF_JIT
+	if (bpf_has_frame_pointer(ip))
+		return &orc_fp_entry;
+#endif
+
+	return NULL;
+}
+
 /*
  * If we crash with IP==0, the last successfully executed instruction
  * was probably an indirect function call with a NULL function pointer,
@@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
 	.type = ORC_TYPE_CALL
 };
 
-/* Fake frame pointer entry -- used as a fallback for generated code */
-static struct orc_entry orc_fp_entry = {
-	.type		= ORC_TYPE_CALL,
-	.sp_reg		= ORC_REG_BP,
-	.sp_offset	= 16,
-	.bp_reg		= ORC_REG_PREV_SP,
-	.bp_offset	= -16,
-};
-
 static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
@@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
 	if (orc)
 		return orc;
 
+	/* BPF lookup: */
+	orc = orc_bpf_find(ip);
+	if (orc)
+		return orc;
+
 	return orc_ftrace_find(ip);
 }
 
@@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (!orc) {
 		/*
 		 * As a fallback, try to assume this code uses a frame pointer.
-		 * This is useful for generated code, like BPF, which ORC
-		 * doesn't know about.  This is just a guess, so the rest of
-		 * the unwind is no longer considered reliable.
+		 * This is just a guess, so the rest of the unwind is no longer
+		 * considered reliable.
 		 */
 		orc = &orc_fp_entry;
 		state->error = true;
-- 
cgit v1.2.3


From c8161e5304abb26e6c0bec6efc947992500fa6c5 Mon Sep 17 00:00:00 2001
From: Yongxin Liu <yongxin.liu@windriver.com>
Date: Wed, 10 Dec 2025 08:02:20 +0800
Subject: x86/fpu: Fix FPU state core dump truncation on CPUs with no extended
 xfeatures

Zero can be a valid value of num_records. For example, on Intel Atom x6425RE,
only x87 and SSE are supported (features 0, 1), and fpu_user_cfg.max_features
is 3. The for_each_extended_xfeature() loop only iterates feature 2, which is
not enabled, so num_records = 0. This is valid and should not cause core dump
failure.

The issue is that dump_xsave_layout_desc() returns 0 for both genuine errors
(dump_emit() failure) and valid cases (no extended features). Use negative
return values for errors and only abort on genuine failures.

Fixes: ba386777a30b ("x86/elf: Add a new FPU buffer layout info to x86 core files")
Signed-off-by: Yongxin Liu <yongxin.liu@windriver.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251210000219.4094353-2-yongxin.liu@windriver.com
---
 arch/x86/kernel/fpu/xstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa..76153dfb58c9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
 		};
 
 		if (!dump_emit(cprm, &xc, sizeof(xc)))
-			return 0;
+			return -1;
 
 		num_records++;
 	}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 		return 1;
 
 	num_records = dump_xsave_layout_desc(cprm);
-	if (!num_records)
+	if (num_records < 0)
 		return 1;
 
 	/* Total size should be equal to the number of records */
-- 
cgit v1.2.3


From 043507144ae13d3b882d40495d101bb4c4990d98 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Wed, 10 Dec 2025 13:56:28 +0100
Subject: x86/sgx: Remove unmatched quote in __sgx_encl_extend function comment

There is no opening quote. Remove the unmatched closing quote.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Link: https://patch.msgid.link/20251210125628.544916-1-thorsten.blum@linux.dev
---
 arch/x86/kernel/cpu/sgx/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb..9322a9287dc7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
 /*
  * If the caller requires measurement of the page as a proof for the content,
  * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
  */
 static int __sgx_encl_extend(struct sgx_encl *encl,
 			     struct sgx_epc_page *epc_page)
-- 
cgit v1.2.3


From 21433d3e3ca14d20f9b0c2237b3d3a1355af7907 Mon Sep 17 00:00:00 2001
From: Kyle Meyer <kyle.meyer@hpe.com>
Date: Fri, 12 Dec 2025 12:53:36 -0600
Subject: x86/platform/uv: Fix UBSAN array-index-out-of-bounds

When UBSAN is enabled, multiple array-index-out-of-bounds messages are
printed:

  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:276:23
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:277:32
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:282:16
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.515850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1344:23
  [    0.519851] [     T1] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.603850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1345:32
  [    0.607850] [     T1] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.691850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1353:20
  [    0.695850] [     T1] index 1 is out of range for type '<unknown> [1]'

One-element arrays have been deprecated:

  https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays

Switch entry in struct uv_systab to a flexible array member to fix UBSAN
array-index-out-of-bounds messages.

sizeof(struct uv_systab) is passed to early_memremap() and ioremap(). The
flexible array member is not accessed until the UV system table size is used to
remap the entire UV system table, so changes to sizeof(struct uv_systab) have no
impact.

Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/aTxksN-3otY41WvQ@hpe.com
---
 arch/x86/include/asm/uv/bios.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd32..d0b62e255290 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
 	struct {
 		u32 type:8;	/* type of entry */
 		u32 offset:24;	/* byte offset from struct start to entry */
-	} entry[1];		/* additional entries follow */
+	} entry[];		/* additional entries follow */
 };
 extern struct uv_systab *uv_systab;
 
-- 
cgit v1.2.3


From b1aa01d31249bd116b18c7f512d3e46b4b4ad83b Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 5 Dec 2025 10:58:57 +0100
Subject: s390/ipl: Clear SBP flag when bootprog is set

With z16 a new flag 'search boot program' was introduced for
list-directed IPL (SCSI, NVMe, ECKD DASD). If this flag is set,
e.g. via selecting the "Automatic" value for the "Boot program
selector" control on an HMC load panel, it is copied to the reipl
structure from the initial ipl structure. When a user now sets a
boot prog via sysfs, the flag is not cleared and the bootloader
will again automatically select the boot program, ignoring user
configuration.

To avoid that, clear the SBP flag when a bootprog sysfs file is
written.

Cc: stable@vger.kernel.org
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/uapi/asm/ipl.h |  1 +
 arch/s390/kernel/ipl.c           | 48 ++++++++++++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 2cd28af50dd4..3d64a2251699 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -15,6 +15,7 @@ struct ipl_pl_hdr {
 #define IPL_PL_FLAG_IPLPS	0x80
 #define IPL_PL_FLAG_SIPL	0x40
 #define IPL_PL_FLAG_IPLSR	0x20
+#define IPL_PL_FLAG_SBP		0x10
 
 /* IPL Parameter Block header */
 struct ipl_pb_hdr {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 961a3d60a4dd..dcdc7e274848 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
+#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value)	\
+	IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)		\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,			\
+		struct kobj_attribute *attr,						\
+		const char *buf, size_t len)						\
+{											\
+	unsigned long long value;							\
+	if (sscanf(buf, _fmt_in, &value) != 1)						\
+		return -EINVAL;								\
+	(_value) = value;								\
+	(_hdr).flags &= ~IPL_PL_FLAG_SBP;						\
+	return len;									\
+}											\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =				\
+	__ATTR(_name, 0644,								\
+			sys_##_prefix##_##_name##_show,					\
+			sys_##_prefix##_##_name##_store)
+
 #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
 IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
 static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
@@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
 		   reipl_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
 		   reipl_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
 		   reipl_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 		   reipl_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_fcp->hdr,
+			    reipl_block_fcp->fcp.bootprog);
 
 static void reipl_get_ascii_loadparm(char *loadparm,
 				     struct ipl_parameter_block *ibp)
@@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
 		   reipl_block_nvme->nvme.fid);
 DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
 		   reipl_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_nvme->nvme.bootprog);
 DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
 		   reipl_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_nvme->hdr,
+			    reipl_block_nvme->nvme.bootprog);
 
 static struct attribute *reipl_nvme_attrs[] = {
 	&sys_reipl_nvme_fid_attr.attr,
@@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
 };
 
 DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_eckd->hdr,
+			    reipl_block_eckd->eckd.bootprog);
 
 static struct attribute *reipl_eckd_attrs[] = {
 	&sys_reipl_eckd_device_attr.attr,
@@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
 		   dump_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
 		   dump_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
-		   dump_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
 		   dump_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 		   dump_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+			    dump_block_fcp->hdr,
+			    dump_block_fcp->fcp.bootprog);
 
 DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
 			    dump_block_fcp->fcp,
@@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
 		   dump_block_nvme->nvme.fid);
 DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
 		   dump_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
-		   dump_block_nvme->nvme.bootprog);
 DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
 		   dump_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+			    dump_block_nvme->hdr,
+			    dump_block_nvme->nvme.bootprog);
 
 DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
 			    dump_block_nvme->nvme,
@@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = {
 
 /* ECKD dump device attributes */
 DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
-		   dump_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+			    dump_block_eckd->hdr,
+			    dump_block_eckd->eckd.bootprog);
 
 IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
 IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
-- 
cgit v1.2.3


From 4cb92fa763823d813d22b45b7f18fcf6e85a72ad Mon Sep 17 00:00:00 2001
From: Benjamin Block <bblock@linux.ibm.com>
Date: Fri, 5 Dec 2025 16:47:17 +0100
Subject: s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and
 zpci_scan_devices()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When triggering PCI device recovery by writing into the SysFS attribute
`recover` of a Physical Function with existing child SR-IOV Virtual
Functions, lockdep is reporting a possible deadlock between three
threads:

         Thread (A)             Thread (B)             Thread (C)
             |                      |                      |
      recover_store()      zpci_scan_devices()    zpci_scan_devices()
lock(pci_rescan_remove_lock)        |                      |
             |                      |                      |
             |                      |            zpci_bus_scan_busses()
             |                      |             lock(zbus_list_lock)
             |              zpci_add_device()              |
             |          lock(zpci_add_remove_lock)         |
             |                      |                      ┴
             |                      |             zpci_bus_scan_bus()
             |                      |         lock(pci_rescan_remove_lock)
             ┴                      |
      zpci_zdev_put()               |
 lock(zpci_add_remove_lock)         |
                                    ┴
                              zpci_bus_get()
                           lock(zbus_list_lock)

In zpci_bus_scan_busses() the `zbus_list_lock` is taken for the whole
duration of the function, which also includes taking
`pci_rescan_remove_lock`, among other things. But `zbus_list_lock` only
really needs to protect the modification of the global registration
`zbus_list`, it can be dropped while the functions within the list
iteration run; this way we break the cycle above.

Break up zpci_bus_scan_busses() into an "iterator" zpci_bus_get_next()
that iterates over `zbus_list` element by element, and acquires and
releases `zbus_list_lock` as necessary, but never keep holding it.
References to `zpci_bus` objects are also acquired and released.

The reference counting on `zpci_bus` objects is also changed so that all
put() and get() operations are done under the protection of
`zbus_list_lock`, and if the operation results in a modification of
`zpci_bus_list`, this modification is done in the same critical section
(apart the very first initialization). This way objects are never seen
on the list that are about to be released and/or half-initialized.

Fixes: 14c87ba8123a ("s390/pci: separate zbus registration from scanning")
Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci.c     |  6 ++-
 arch/s390/pci/pci_bus.c | 98 +++++++++++++++++++++++++++++++++++--------------
 arch/s390/pci/pci_bus.h | 15 +++++++-
 3 files changed, 90 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 5a6ace9d875a..8fd14d043008 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1148,6 +1148,7 @@ static void zpci_add_devices(struct list_head *scan_list)
 
 int zpci_scan_devices(void)
 {
+	struct zpci_bus *zbus;
 	LIST_HEAD(scan_list);
 	int rc;
 
@@ -1156,7 +1157,10 @@ int zpci_scan_devices(void)
 		return rc;
 
 	zpci_add_devices(&scan_list);
-	zpci_bus_scan_busses();
+	zpci_bus_for_each(zbus) {
+		zpci_bus_scan_bus(zbus);
+		cond_resched();
+	}
 	return 0;
 }
 
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 66c4bd888b29..42a13e451f64 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
 	return ret;
 }
 
-/* zpci_bus_scan_busses - Scan all registered busses
- *
- * Scan all available zbusses
- *
- */
-void zpci_bus_scan_busses(void)
-{
-	struct zpci_bus *zbus = NULL;
-
-	mutex_lock(&zbus_list_lock);
-	list_for_each_entry(zbus, &zbus_list, bus_next) {
-		zpci_bus_scan_bus(zbus);
-		cond_resched();
-	}
-	mutex_unlock(&zbus_list_lock);
-}
-
 static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
 {
 	return !s390_pci_no_rid && zdev->rid_available &&
@@ -222,10 +205,29 @@ out_free_domain:
 	return -ENOMEM;
 }
 
-static void zpci_bus_release(struct kref *kref)
+/**
+ * zpci_bus_release - Un-initialize resources associated with the zbus and
+ *		      free memory
+ * @kref:	refcount * that is part of struct zpci_bus
+ *
+ * MUST be called with `zbus_list_lock` held, but the lock is released during
+ * run of the function.
+ */
+static inline void zpci_bus_release(struct kref *kref)
+	__releases(&zbus_list_lock)
 {
 	struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
 
+	lockdep_assert_held(&zbus_list_lock);
+
+	list_del(&zbus->bus_next);
+	mutex_unlock(&zbus_list_lock);
+
+	/*
+	 * At this point no-one should see this object, or be able to get a new
+	 * reference to it.
+	 */
+
 	if (zbus->bus) {
 		pci_lock_rescan_remove();
 		pci_stop_root_bus(zbus->bus);
@@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
 		pci_unlock_rescan_remove();
 	}
 
-	mutex_lock(&zbus_list_lock);
-	list_del(&zbus->bus_next);
-	mutex_unlock(&zbus_list_lock);
 	zpci_remove_parent_msi_domain(zbus);
 	kfree(zbus);
 }
 
-static void zpci_bus_put(struct zpci_bus *zbus)
+static inline void __zpci_bus_get(struct zpci_bus *zbus)
+{
+	lockdep_assert_held(&zbus_list_lock);
+	kref_get(&zbus->kref);
+}
+
+static inline void zpci_bus_put(struct zpci_bus *zbus)
 {
-	kref_put(&zbus->kref, zpci_bus_release);
+	kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
 }
 
 static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
@@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
 		if (!zbus->multifunction)
 			continue;
 		if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
-			kref_get(&zbus->kref);
+			__zpci_bus_get(zbus);
 			goto out_unlock;
 		}
 	}
@@ -268,6 +273,44 @@ out_unlock:
 	return zbus;
 }
 
+/**
+ * zpci_bus_get_next - get the next zbus object from given position in the list
+ * @pos:	current position/cursor in the global zbus list
+ *
+ * Acquires and releases references as the cursor iterates (might also free/
+ * release the cursor). Is tolerant of concurrent operations on the list.
+ *
+ * To begin the iteration, set *@pos to %NULL before calling the function.
+ *
+ * *@pos is set to %NULL in cases where either the list is empty, or *@pos is
+ * the last element in the list.
+ *
+ * Context: Process context. May sleep.
+ */
+void zpci_bus_get_next(struct zpci_bus **pos)
+{
+	struct zpci_bus *curp = *pos, *next = NULL;
+
+	mutex_lock(&zbus_list_lock);
+	if (curp)
+		next = list_next_entry(curp, bus_next);
+	else
+		next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
+
+	if (list_entry_is_head(next, &zbus_list, bus_next))
+		next = NULL;
+
+	if (next)
+		__zpci_bus_get(next);
+
+	*pos = next;
+	mutex_unlock(&zbus_list_lock);
+
+	/* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
+	if (curp)
+		zpci_bus_put(curp);
+}
+
 static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 {
 	struct zpci_bus *zbus;
@@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 	zbus->topo = topo;
 	zbus->topo_is_tid = topo_is_tid;
 	INIT_LIST_HEAD(&zbus->bus_next);
-	mutex_lock(&zbus_list_lock);
-	list_add_tail(&zbus->bus_next, &zbus_list);
-	mutex_unlock(&zbus_list_lock);
 
 	kref_init(&zbus->kref);
 	INIT_LIST_HEAD(&zbus->resources);
@@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 	zbus->bus_resource.flags = IORESOURCE_BUS;
 	pci_add_resource(&zbus->resources, &zbus->bus_resource);
 
+	mutex_lock(&zbus_list_lock);
+	list_add_tail(&zbus->bus_next, &zbus_list);
+	mutex_unlock(&zbus_list_lock);
+
 	return zbus;
 }
 
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index ae3d7a9159bd..e440742e3145 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
 void zpci_bus_device_unregister(struct zpci_dev *zdev);
 
 int zpci_bus_scan_bus(struct zpci_bus *zbus);
-void zpci_bus_scan_busses(void);
+void zpci_bus_get_next(struct zpci_bus **pos);
+
+/**
+ * zpci_bus_for_each - iterate over all the registered zbus objects
+ * @pos:	a struct zpci_bus * as cursor
+ *
+ * Acquires and releases references as the cursor iterates over the registered
+ * objects. Is tolerant against concurrent removals of objects.
+ *
+ * Context: Process context. May sleep.
+ */
+#define zpci_bus_for_each(pos)					     \
+	for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
+	     zpci_bus_get_next(&(pos)))
 
 int zpci_bus_scan_device(struct zpci_dev *zdev);
 void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
-- 
cgit v1.2.3


From af241e6bfc11125e6669dabf0800fce6809dd3cf Mon Sep 17 00:00:00 2001
From: Benjamin Block <bblock@linux.ibm.com>
Date: Fri, 5 Dec 2025 16:47:18 +0100
Subject: s390/pci: Annotate lock context imbalance in zpci_release_device()

When checking `arch/s390/pci/pci.c` with `sparse` during build, the
following complaint is reported:

  arch/s390/pci/pci.c: note: in included file (through include/linux/smp.h, include/linux/lockdep.h, include/linux/spinlock.h, include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h):
  ./include/linux/list.h:237:25: warning: context imbalance in 'zpci_release_device' - unexpected unlock

But this is expected, as zpci_release_device() is expected to be called
with `zpci_list_lock` held, as part of `kref_put_lock()` or similar.

Reflect this by annotating the function with the appropriate
__releases().

Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8fd14d043008..57f3980b98a9 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev)
 }
 
 void zpci_release_device(struct kref *kref)
+	__releases(&zpci_list_lock)
 {
 	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
 
-- 
cgit v1.2.3


From 489e96651dfe59794195c6b2ddb78835edd9f2ed Mon Sep 17 00:00:00 2001
From: Jens Remus <jremus@linux.ibm.com>
Date: Thu, 11 Dec 2025 12:24:50 +0100
Subject: s390/stacktrace: Do not fallback to RA register

The logic to fallback to the return address (RA) register value in
the topmost frame when stack tracing using back chain is broken in
multiple ways:

When assuming the RA register 14 has not been saved yet one must assume
that a new user stack frame has not been allocated either.  Therefore
the back chain would not contain the stack pointer (SP) at entry, but
the caller's SP at its entry instead.

Therefore when falling back to the RA register 14 value it would also be
necessary to fallback to the SP register 15 value.  Otherwise an invalid
combination of RA register 14 and caller's SP at its entry (from the
back chain) is used.

In the topmost frame the back chain contains either the caller's SP at
its entry (before having allocated a new stack frame in the prologue),
the SP at entry (after having allocated a new stack frame), or an
uninitialized value (during static/dynamic stack allocation).  In both
cases where the back chain is valid either the caller or prologue must
have saved its respective RA to the respective frame.  Therefore, if the
RA obtained from the frame pointed to by the back chain is invalid, this
does not indicate that the IP in the topmost frame is still early in the
prologue and the RA has not been saved.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/stacktrace.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 3aae7f70e6ab..18520d333058 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
 	struct stack_frame_vdso_wrapper __user *sf_vdso;
 	struct stack_frame_user __user *sf;
 	unsigned long ip, sp;
-	bool first = true;
 
 	if (!current->mm)
 		return;
@@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
 			if (__get_user(ip, &sf->gprs[8]))
 				break;
 		}
-		/* Sanity check: ABI requires SP to be 8 byte aligned. */
-		if (sp & 0x7)
+		/* Validate SP and RA (ABI requires SP to be 8 byte aligned). */
+		if (sp & 0x7 || ip_invalid(ip))
 			break;
-		if (ip_invalid(ip)) {
-			/*
-			 * If the instruction address is invalid, and this
-			 * is the first stack frame, assume r14 has not
-			 * been written to the stack yet. Otherwise exit.
-			 */
-			if (!first)
-				break;
-			ip = regs->gprs[14];
-			if (ip_invalid(ip))
-				break;
-		}
 		if (!store_ip(consume_entry, cookie, entry, perf, ip))
 			break;
-		first = false;
 	}
 	pagefault_enable();
 }
-- 
cgit v1.2.3


From c4b502d60a71cf0c0c938f133dc4c0e2adc17b44 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 9 Dec 2025 06:48:49 +0100
Subject: arm64/simd: Avoid pointless clearing of FP/SIMD buffer

The buffer provided to kernel_neon_begin() is only used if the task is
scheduled out while the FP/SIMD is in use by the kernel, or when such a
section is interrupted by a softirq that also uses the FP/SIMD.

IOW, this happens rarely, and even if it happened often, there is still
no reason for this buffer to be cleared beforehand, which happens
unconditionally, due to the use of a compound literal expression.

So define that buffer variable explicitly, and mark it as
__uninitialized so that it will not get cleared, even when
-ftrivial-auto-var-init is in effect.

This requires some preprocessor gymnastics, due to the fact that the
variable must be defined throughout the entire guarded scope, and the
expression

  ({ struct user_fpsimd_state __uninitialized st; &st; })

is problematic in that regard, even though the compilers seem to
permit it. So instead, repeat the 'for ()' trick that is also used in
the implementation of the guarded scope helpers.

Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Fixes: 4fa617cc6851 ("arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack")
Link: https://lore.kernel.org/r/20251209054848.998878-2-ardb@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 arch/arm64/include/asm/simd.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 0941f6f58a14..69ecbd69ca8c 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -48,6 +48,13 @@ DEFINE_LOCK_GUARD_1(ksimd,
 		    kernel_neon_begin(_T->lock),
 		    kernel_neon_end(_T->lock))
 
-#define scoped_ksimd()	scoped_guard(ksimd, &(struct user_fpsimd_state){})
+#define __scoped_ksimd(_label)					\
+	for (struct user_fpsimd_state __uninitialized __st;	\
+	     true; ({ goto _label; }))				\
+		if (0) {					\
+_label:			break;					\
+		} else scoped_guard(ksimd, &__st)
+
+#define scoped_ksimd()	__scoped_ksimd(__UNIQUE_ID(label))
 
 #endif
-- 
cgit v1.2.3


From b7737c38e7cb611c2fbd87af3b09afeb92c96fe7 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Wed, 19 Nov 2025 13:00:16 +0000
Subject: arm64: mm: Simplify check in arch_kfence_init_pool()

TL;DR: checking force_pte_mapping() in arch_kfence_init_pool() is
sufficient

Commit ce2b3a50ad92 ("arm64: mm: Don't sleep in
split_kernel_leaf_mapping() when in atomic context") recently added
an arm64 implementation of arch_kfence_init_pool() to ensure that
the KFENCE pool is PTE-mapped. Assuming that the pool was not
initialised early, block splitting is necessary if the linear
mapping is not fully PTE-mapped, in other words if
force_pte_mapping() is false.

arch_kfence_init_pool() currently makes another check: whether
BBML2-noabort is supported, i.e. whether we are *able* to split
block mappings. This check is however unnecessary, because
force_pte_mapping() is always true if KFENCE is enabled and
BBML2-noabort is not supported. This must be the case by design,
since KFENCE requires PTE-mapped pages in all cases. We can
therefore remove that check.

The situation is different in split_kernel_leaf_mapping(), as that
function is called unconditionally regardless of the configuration.
If BBML2-noabort is not supported, it cannot do anything and bails
out. If force_pte_mapping() is true, there is nothing to do and it
also bails out, but these are independent checks.

Commit 53357f14f924 ("arm64: mm: Tidy up force_pte_mapping()")
grouped these checks into a helper, split_leaf_mapping_possible().
This isn't so helpful as only split_kernel_leaf_mapping() should
check both. Revert the parts of that commit that introduced the
helper, reintroducing the more accurate comments in
split_kernel_leaf_mapping().

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ae7ce00a7ef..8e1d80a7033e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -767,18 +767,6 @@ static inline bool force_pte_mapping(void)
 	return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
 }
 
-static inline bool split_leaf_mapping_possible(void)
-{
-	/*
-	 * !BBML2_NOABORT systems should never run into scenarios where we would
-	 * have to split. So exit early and let calling code detect it and raise
-	 * a warning.
-	 */
-	if (!system_supports_bbml2_noabort())
-		return false;
-	return !force_pte_mapping();
-}
-
 static DEFINE_MUTEX(pgtable_split_lock);
 
 int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -786,11 +774,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
 	int ret;
 
 	/*
-	 * Exit early if the region is within a pte-mapped area or if we can't
-	 * split. For the latter case, the permission change code will raise a
-	 * warning if not already pte-mapped.
+	 * !BBML2_NOABORT systems should not be trying to change permissions on
+	 * anything that is not pte-mapped in the first place. Just return early
+	 * and let the permission change code raise a warning if not already
+	 * pte-mapped.
 	 */
-	if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
+	if (!system_supports_bbml2_noabort())
+		return 0;
+
+	/*
+	 * If the region is within a pte-mapped area, there is no need to try to
+	 * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
+	 * change permissions from atomic context so for those cases (which are
+	 * always pte-mapped), we must not go any further because taking the
+	 * mutex below may sleep.
+	 */
+	if (force_pte_mapping() || is_kfence_address((void *)start))
 		return 0;
 
 	/*
@@ -1089,7 +1088,7 @@ bool arch_kfence_init_pool(void)
 	int ret;
 
 	/* Exit early if we know the linear map is already pte-mapped. */
-	if (!split_leaf_mapping_possible())
+	if (force_pte_mapping())
 		return true;
 
 	/* Kfence pool is already pte-mapped for the early init case. */
-- 
cgit v1.2.3


From 63de2b3859ba1def9f43ed0a9c25a68810208e5c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 6 Dec 2025 20:01:17 +0100
Subject: arm64/efi: Remove unneeded SVE/SME fallback preserve/store handling

Since commit 7137a203b251 ("arm64/fpsimd: Permit kernel mode NEON with
IRQs off"), the only condition under which the fallback path is taken
for FP/SIMD preserve/restore across a EFI runtime call is when it is
called from hardirq or NMI context.

In practice, this only happens when the EFI pstore driver is called to
dump the kernel log buffer into a EFI variable under a panic, oops or
emergency_restart() condition, and none of these can be expected to
result in a return to user space for the task in question.

This means that the existing EFI-specific logic for preserving and
restoring SVE/SME state is pointless, and can be removed.

Instead, kill the task, so that an exceedingly unlikely inadvertent
return to user space does not proceed with a corrupted FP/SIMD state.
Also, retain the preserve and restore of the base FP/SIMD state, as that
might belong to kernel mode use of FP/SIMD. (Note that EFI runtime calls
are never invoked reentrantly, even in this case, and so any interrupted
kernel mode FP/SIMD usage will be unrelated to EFI)

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 130 +++++++--------------------------------------
 1 file changed, 20 insertions(+), 110 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c154f72634e0..9de1d8a604cb 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val)
 	set_default_vl(ARM64_VEC_SVE, val);
 }
 
-static u8 *efi_sve_state;
-
-#else /* ! CONFIG_ARM64_SVE */
-
-/* Dummy declaration for code that will be optimised out: */
-extern u8 *efi_sve_state;
-
 #endif /* ! CONFIG_ARM64_SVE */
 
 #ifdef CONFIG_ARM64_SME
@@ -1095,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type)
 	return 0;
 }
 
-static void __init sve_efi_setup(void)
-{
-	int max_vl = 0;
-	int i;
-
-	if (!IS_ENABLED(CONFIG_EFI))
-		return;
-
-	for (i = 0; i < ARRAY_SIZE(vl_info); i++)
-		max_vl = max(vl_info[i].max_vl, max_vl);
-
-	/*
-	 * alloc_percpu() warns and prints a backtrace if this goes wrong.
-	 * This is evidence of a crippled system and we are returning void,
-	 * so no attempt is made to handle this situation here.
-	 */
-	if (!sve_vl_valid(max_vl))
-		goto fail;
-
-	efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
-				GFP_KERNEL);
-	if (!efi_sve_state)
-		goto fail;
-
-	return;
-
-fail:
-	panic("Cannot allocate memory for EFI SVE save/restore");
-}
-
 void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
 {
 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1185,8 +1148,6 @@ void __init sve_setup(void)
 	if (sve_max_virtualisable_vl() < sve_max_vl())
 		pr_warn("%s: unvirtualisable vector lengths present\n",
 			info->name);
-
-	sve_efi_setup();
 }
 
 /*
@@ -1947,9 +1908,6 @@ EXPORT_SYMBOL_GPL(kernel_neon_end);
 #ifdef CONFIG_EFI
 
 static struct user_fpsimd_state efi_fpsimd_state;
-static bool efi_fpsimd_state_used;
-static bool efi_sve_state_used;
-static bool efi_sm_state;
 
 /*
  * EFI runtime services support functions
@@ -1976,43 +1934,26 @@ void __efi_fpsimd_begin(void)
 	if (may_use_simd()) {
 		kernel_neon_begin(&efi_fpsimd_state);
 	} else {
-		WARN_ON(preemptible());
-
 		/*
-		 * If !efi_sve_state, SVE can't be in use yet and doesn't need
-		 * preserving:
+		 * We are running in hardirq or NMI context, and the only
+		 * legitimate case where this might happen is when EFI pstore
+		 * is attempting to record the system's dying gasps into EFI
+		 * variables. This could be due to an oops, a panic or a call
+		 * to emergency_restart(), and in none of those cases, we can
+		 * expect the current task to ever return to user space again,
+		 * or for the kernel to resume any normal execution, for that
+		 * matter (an oops in hardirq context triggers a panic too).
+		 *
+		 * Therefore, there is no point in attempting to preserve any
+		 * SVE/SME state here. On the off chance that we might have
+		 * ended up here for a different reason inadvertently, kill the
+		 * task and preserve/restore the base FP/SIMD state, which
+		 * might belong to kernel mode FP/SIMD.
 		 */
-		if (system_supports_sve() && efi_sve_state != NULL) {
-			bool ffr = true;
-			u64 svcr;
-
-			efi_sve_state_used = true;
-
-			if (system_supports_sme()) {
-				svcr = read_sysreg_s(SYS_SVCR);
-
-				efi_sm_state = svcr & SVCR_SM_MASK;
-
-				/*
-				 * Unless we have FA64 FFR does not
-				 * exist in streaming mode.
-				 */
-				if (!system_supports_fa64())
-					ffr = !(svcr & SVCR_SM_MASK);
-			}
-
-			sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
-				       &efi_fpsimd_state.fpsr, ffr);
-
-			if (system_supports_sme())
-				sysreg_clear_set_s(SYS_SVCR,
-						   SVCR_SM_MASK, 0);
-
-		} else {
-			fpsimd_save_state(&efi_fpsimd_state);
-		}
-
-		efi_fpsimd_state_used = true;
+		pr_warn_ratelimited("Calling EFI runtime from %s context\n",
+				    in_nmi() ? "NMI" : "hardirq");
+		force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+		fpsimd_save_state(&efi_fpsimd_state);
 	}
 }
 
@@ -2024,41 +1965,10 @@ void __efi_fpsimd_end(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	if (!efi_fpsimd_state_used) {
+	if (may_use_simd()) {
 		kernel_neon_end(&efi_fpsimd_state);
 	} else {
-		if (system_supports_sve() && efi_sve_state_used) {
-			bool ffr = true;
-
-			/*
-			 * Restore streaming mode; EFI calls are
-			 * normal function calls so should not return in
-			 * streaming mode.
-			 */
-			if (system_supports_sme()) {
-				if (efi_sm_state) {
-					sysreg_clear_set_s(SYS_SVCR,
-							   0,
-							   SVCR_SM_MASK);
-
-					/*
-					 * Unless we have FA64 FFR does not
-					 * exist in streaming mode.
-					 */
-					if (!system_supports_fa64())
-						ffr = false;
-				}
-			}
-
-			sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
-				       &efi_fpsimd_state.fpsr, ffr);
-
-			efi_sve_state_used = false;
-		} else {
-			fpsimd_load_state(&efi_fpsimd_state);
-		}
-
-		efi_fpsimd_state_used = false;
+		fpsimd_load_state(&efi_fpsimd_state);
 	}
 }
 
-- 
cgit v1.2.3


From 98a97bf41528ef738b06eb07ec2b2eb1cfde6ce6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sat, 29 Nov 2025 00:48:45 +0000
Subject: arm64/gcs: Flush the GCS locking state on exec

When we exec a new task we forget to flush the set of locked GCS mode bits.
Since we do flush the rest of the state this means that if GCS is locked
the new task will be unable to enable GCS, it will be locked as being
disabled. Add the expected flush.

Fixes: fc84bc5378a8 ("arm64/gcs: Context switch GCS state for EL0")
Cc: <stable@vger.kernel.org> # 6.13.x
Reported-by: Yury Khrustalev <Yury.Khrustalev@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Tested-by: Yury Khrustalev <yury.khrustalev@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fba7ca102a8c..489554931231 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -292,6 +292,7 @@ static void flush_gcs(void)
 	current->thread.gcs_base = 0;
 	current->thread.gcs_size = 0;
 	current->thread.gcs_el0_mode = 0;
+	current->thread.gcs_el0_locked = 0;
 	write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
 	write_sysreg_s(0, SYS_GCSPR_EL0);
 }
-- 
cgit v1.2.3


From bd94fbe8b55f38c24a63cca2854ff74b62780d77 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 29 Oct 2025 16:03:16 +0100
Subject: MIPS: Alchemy: Remove bogus static/inline specifiers

The recent io_remap_pfn_range() rework applied the static and inline
specifiers to the implementation of io_remap_pfn_range_pfn() on MIPS
Alchemy, mirroring the same change on other platforms. However, this
function is defined in a source file and that definition causes a
conflict with its declaration. Fix this by dropping the specifiers.

Fixes: c707a68f9468 ("mm: abstract io_remap_pfn_range() based on PFN")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/alchemy/common/setup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index c35b4f809d51..992134a8c23a 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -94,8 +94,7 @@ phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
 	return phys_addr;
 }
 
-static inline unsigned long io_remap_pfn_range_pfn(unsigned long pfn,
-		unsigned long size)
+unsigned long io_remap_pfn_range_pfn(unsigned long pfn, unsigned long size)
 {
 	phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
 
-- 
cgit v1.2.3


From 680ad315caaa2860df411cb378bf3614d96c7648 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024@163.com>
Date: Thu, 4 Dec 2025 18:36:18 +0800
Subject: MIPS: Fix a reference leak bug in ip22_check_gio()

If gio_device_register fails, gio_dev_put() is required to
drop the gio_dev device reference.

Fixes: e84de0c61905 ("MIPS: GIO bus support for SGI IP22/28")
Signed-off-by: Haoxiang Li <haoxiang_li2024@163.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/sgi-ip22/ip22-gio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index 5893ea4e382c..19b70928d6dc 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -372,7 +372,8 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq)
 		gio_dev->resource.flags = IORESOURCE_MEM;
 		gio_dev->irq = irq;
 		dev_set_name(&gio_dev->dev, "%d", slotno);
-		gio_device_register(gio_dev);
+		if (gio_device_register(gio_dev))
+			gio_dev_put(gio_dev);
 	} else
 		printk(KERN_INFO "GIO: slot %d : Empty\n", slotno);
 }
-- 
cgit v1.2.3


From e5aff444e3a7bdeef5ea796a2099fc3c60a070fa Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 15 Dec 2025 12:51:12 +0100
Subject: x86/xen: Fix sparse warning in enlighten_pv.c

The sparse tool issues a warning for arch/x76/xen/enlighten_pv.c:

   arch/x86/xen/enlighten_pv.c:120:9: sparse: sparse: incorrect type
     in initializer (different address spaces)
     expected void const [noderef] __percpu *__vpp_verify
     got bool *

This is due to the percpu variable xen_in_preemptible_hcall being
exported via EXPORT_SYMBOL_GPL() instead of EXPORT_PER_CPU_SYMBOL_GPL().

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512140856.Ic6FetG6-lkp@intel.com/
Fixes: fdfd811ddde3 ("x86/xen: allow privcmd hypercalls to be preempted")
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20251215115112.15072-1-jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4806cc28d7ca..b74ff8bc7f2a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,7 +108,7 @@ static int xen_cpu_dead_pv(unsigned int cpu);
  * calls.
  */
 DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+EXPORT_PER_CPU_SYMBOL_GPL(xen_in_preemptible_hcall);
 
 /*
  * In case of scheduling the flag must be cleared and restored after
-- 
cgit v1.2.3


From 0edc78b82bea85e1b2165d8e870a5c3535919695 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 25 Nov 2025 22:50:45 +0100
Subject: x86/msi: Make irq_retrigger() functional for posted MSI

Luigi reported that retriggering a posted MSI interrupt does not work
correctly.

The reason is that the retrigger happens at the vector domain by sending an
IPI to the actual vector on the target CPU. That works correctly exactly
once because the posted MSI interrupt chip does not issue an EOI as that's
only required for the posted MSI notification vector itself.

As a consequence the vector becomes stale in the ISR, which not only
affects this vector but also any lower priority vector in the affected
APIC because the ISR bit is not cleared.

Luigi proposed to set the vector in the remap PIR bitmap and raise the
posted MSI notification vector. That works, but that still does not cure a
related problem:

  If there is ever a stray interrupt on such a vector, then the related
  APIC ISR bit becomes stale due to the lack of EOI as described above.
  Unlikely to happen, but if it happens it's not debuggable at all.

So instead of playing games with the PIR, this can be actually solved
for both cases by:

 1) Keeping track of the posted interrupt vector handler state

 2) Implementing a posted MSI specific irq_ack() callback which checks that
    state. If the posted vector handler is inactive it issues an EOI,
    otherwise it delegates that to the posted handler.

This is correct versus affinity changes and concurrent events on the posted
vector as the actual handler invocation is serialized through the interrupt
descriptor lock.

Fixes: ed1e48ea4370 ("iommu/vt-d: Enable posted mode for device MSIs")
Reported-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Luigi Rizzo <lrizzo@google.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251125214631.044440658@linutronix.de
Closes: https://lore.kernel.org/lkml/20251124104836.3685533-1-lrizzo@google.com
---
 arch/x86/include/asm/irq_remapping.h |  7 +++++++
 arch/x86/kernel/irq.c                | 23 +++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5a0d42464d44..4e55d1755846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
 }
 
 #endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq	NULL
+#endif
+
 #endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de02..b2fe6181960c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
 
 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */
 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
 
 void intel_posted_msi_init(void)
 {
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
 	this_cpu_write(posted_msi_pi_desc.ndst, destination);
 }
 
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+	irq_move_irq(irqd);
+
+	/*
+	 * Handle the rare case that irq_retrigger() raised the actual
+	 * assigned vector on the target CPU, which means that it was not
+	 * invoked via the posted MSI handler below. In that case APIC EOI
+	 * is required as otherwise the ISR entry becomes stale and lower
+	 * priority interrupts are never going to be delivered after that.
+	 *
+	 * If the posted handler invoked the device interrupt handler then
+	 * the EOI would be premature because it would acknowledge the
+	 * posted vector.
+	 */
+	if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+		apic_eoi();
+}
+
 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
 {
 	unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
 
 	pid = this_cpu_ptr(&posted_msi_pi_desc);
 
+	/* Mark the handler active for intel_ack_posted_msi_irq() */
+	__this_cpu_write(posted_msi_handler_active, true);
 	inc_irq_stat(posted_msi_notification_count);
 	irq_enter();
 
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
 
 	apic_eoi();
 	irq_exit();
+	__this_cpu_write(posted_msi_handler_active, false);
 	set_irq_regs(old_regs);
 }
 #endif /* X86_POSTED_MSI */
-- 
cgit v1.2.3


From c56a12c71ad38f381105f6e5036dede64ad2dfee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 18 Dec 2025 11:47:38 +0100
Subject: x86/bug: Fix old GCC compile fails

For some mysterious reasons the GCC 8 and 9 preprocessor manages to
sporadically fumble _ASM_BYTES(0x0f, 0x0b):

$ grep ".byte[ ]*0x0f" defconfig-build/drivers/net/wireless/realtek/rtlwifi/base.s
        1:       .byte0x0f,0x0b ;
        1:       .byte 0x0f,0x0b ;

which makes the assembler upset and all that. While there are more
_ASM_BYTES() users (notably the NOP instructions), those don't seem
affected. Therefore replace the offending ASM_UD2 with one using the
ud2 mnemonic.

Reported-by: Jean Delvare <jdelvare@suse.de>
Suggested-by: Uros Bizjak <ubizjak@gmail.com>
Fixes: 85a2d4a890dc ("x86,ibt: Use UDB instead of 0xEA")
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251218104659.GT3911114@noisy.programming.kicks-ass.net
---
 arch/x86/include/asm/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index ee23b98353d7..40de5796adb5 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
  */
-#define ASM_UD2		_ASM_BYTES(0x0f, 0x0b)
+#define ASM_UD2		__ASM_FORM(ud2)
 #define INSN_UD2	0x0b0f
 #define LEN_UD2		2
 
-- 
cgit v1.2.3


From 818d78ba1b3f88d2bfee249f25020211488a26c3 Mon Sep 17 00:00:00 2001
From: Andy Chiu <andybnac@gmail.com>
Date: Wed, 12 Nov 2025 16:43:14 -0800
Subject: riscv: signal: abstract header saving for setup_sigcontext

The function save_v_state() served two purposes. First, it saved
extension context into the signal stack. Then, it constructed the
extension header if there was no fault. The second part is independent
of the extension itself. As a result, we can pull that part out, so
future extensions may reuse it. This patch adds arch_ext_list and makes
setup_sigcontext() go through all possible extensions' save() callback.
The callback returns a positive value indicating the size of the
successfully saved extension. Then the kernel proceeds to construct the
header for that extension. The kernel skips an extension if it does
not exist, or if the saving fails for some reasons. The error code is
propagated out on the later case.

This patch does not introduce any functional changes.

Signed-off-by: Andy Chiu <andybnac@gmail.com>
Link: https://patch.msgid.link/20251112-v5_user_cfi_series-v23-16-b55691eacf4f@rivosinc.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/vector.h |  3 ++
 arch/riscv/kernel/signal.c      | 62 +++++++++++++++++++++++++++--------------
 2 files changed, 44 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index e7aa449368ad..00cb9c0982b1 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -424,6 +424,9 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_thread_free(tsk)		do {} while (0)
 #define  riscv_v_setup_ctx_cache()		do {} while (0)
 #define riscv_v_thread_alloc(tsk)		do {} while (0)
+#define get_cpu_vector_context()		do {} while (0)
+#define put_cpu_vector_context()		do {} while (0)
+#define riscv_v_vstate_set_restore(task, regs)	do {} while (0)
 
 #endif /* CONFIG_RISCV_ISA_V */
 
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 08378fea3a11..5a956108b1ea 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -68,18 +68,19 @@ static long save_fp_state(struct pt_regs *regs,
 #define restore_fp_state(task, regs) (0)
 #endif
 
-#ifdef CONFIG_RISCV_ISA_V
-
-static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
+static long save_v_state(struct pt_regs *regs, void __user *sc_vec)
 {
-	struct __riscv_ctx_hdr __user *hdr;
 	struct __sc_riscv_v_state __user *state;
 	void __user *datap;
 	long err;
 
-	hdr = *sc_vec;
-	/* Place state to the user's signal context space after the hdr */
-	state = (struct __sc_riscv_v_state __user *)(hdr + 1);
+	if (!IS_ENABLED(CONFIG_RISCV_ISA_V) ||
+	    !((has_vector() || has_xtheadvector()) &&
+	    riscv_v_vstate_query(regs)))
+		return 0;
+
+	/* Place state to the user's signal context space */
+	state = (struct __sc_riscv_v_state __user *)sc_vec;
 	/* Point datap right after the end of __sc_riscv_v_state */
 	datap = state + 1;
 
@@ -97,15 +98,11 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
 	err |= __put_user((__force void *)datap, &state->v_state.datap);
 	/* Copy the whole vector content to user space datap. */
 	err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize);
-	/* Copy magic to the user space after saving  all vector conetext */
-	err |= __put_user(RISCV_V_MAGIC, &hdr->magic);
-	err |= __put_user(riscv_v_sc_size, &hdr->size);
 	if (unlikely(err))
-		return err;
+		return -EFAULT;
 
-	/* Only progress the sv_vec if everything has done successfully  */
-	*sc_vec += riscv_v_sc_size;
-	return 0;
+	/* Only return the size if everything has done successfully  */
+	return riscv_v_sc_size;
 }
 
 /*
@@ -142,10 +139,20 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
 	 */
 	return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
 }
-#else
-#define save_v_state(task, regs) (0)
-#define __restore_v_state(task, regs) (0)
-#endif
+
+struct arch_ext_priv {
+	__u32 magic;
+	long (*save)(struct pt_regs *regs, void __user *sc_vec);
+};
+
+struct arch_ext_priv arch_ext_list[] = {
+	{
+		.magic = RISCV_V_MAGIC,
+		.save = &save_v_state,
+	},
+};
+
+const size_t nr_arch_exts = ARRAY_SIZE(arch_ext_list);
 
 static long restore_sigcontext(struct pt_regs *regs,
 	struct sigcontext __user *sc)
@@ -270,7 +277,8 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
 {
 	struct sigcontext __user *sc = &frame->uc.uc_mcontext;
 	struct __riscv_ctx_hdr __user *sc_ext_ptr = &sc->sc_extdesc.hdr;
-	long err;
+	struct arch_ext_priv *arch_ext;
+	long err, i, ext_size;
 
 	/* sc_regs is structured the same as the start of pt_regs */
 	err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
@@ -278,8 +286,20 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
 	if (has_fpu())
 		err |= save_fp_state(regs, &sc->sc_fpregs);
 	/* Save the vector state. */
-	if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs))
-		err |= save_v_state(regs, (void __user **)&sc_ext_ptr);
+	for (i = 0; i < nr_arch_exts; i++) {
+		arch_ext = &arch_ext_list[i];
+		if (!arch_ext->save)
+			continue;
+
+		ext_size = arch_ext->save(regs, sc_ext_ptr + 1);
+		if (ext_size <= 0) {
+			err |= ext_size;
+		} else {
+			err |= __put_user(arch_ext->magic, &sc_ext_ptr->magic);
+			err |= __put_user(ext_size, &sc_ext_ptr->size);
+			sc_ext_ptr = (void *)sc_ext_ptr + ext_size;
+		}
+	}
 	/* Write zero to fp-reserved space and check it on restore_sigcontext */
 	err |= __put_user(0, &sc->sc_extdesc.reserved);
 	/* And put END __riscv_ctx_hdr at the end. */
-- 
cgit v1.2.3


From 1e6084d5c433b142b18d57694a6ab555ca6bb8cc Mon Sep 17 00:00:00 2001
From: Paul Walmsley <pjw@kernel.org>
Date: Mon, 17 Nov 2025 21:19:27 -0700
Subject: riscv: mm: pmdp_huge_get_and_clear(): avoid atomic ops when
 !CONFIG_SMP

When !CONFIG_SMP, there's no need for atomic operations in
pmdp_huge_get_and_clear(), so, similar to what x86 does, let's not use
atomics in this case.  See also commit 546e42c8c6d94 ("riscv: Use an
atomic xchg in pudp_huge_get_and_clear()").

Cc: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/pgtable.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 8bd36ac842eb..1df8a6adb407 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -997,7 +997,13 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 					unsigned long address, pmd_t *pmdp)
 {
+#ifdef CONFIG_SMP
 	pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
+#else
+	pmd_t pmd = *pmdp;
+
+	pmd_clear(pmdp);
+#endif
 
 	page_table_check_pmd_clear(mm, pmd);
 
-- 
cgit v1.2.3


From 425cc087fbaf267be7683b95481b46a058d63e49 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <pjw@kernel.org>
Date: Mon, 17 Nov 2025 21:19:27 -0700
Subject: riscv: mm: ptep_get_and_clear(): avoid atomic ops when !CONFIG_SMP

When !CONFIG_SMP, there's no need for atomic operations in
ptep_get_and_clear(), so, similar to x86, let's not use atomics in
this case.

Cc: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/pgtable.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 1df8a6adb407..ebab8ecd78b2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -660,7 +660,13 @@ extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long a
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address, pte_t *ptep)
 {
+#ifdef CONFIG_SMP
 	pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+#else
+	pte_t pte = *ptep;
+
+	set_pte(ptep, __pte(0));
+#endif
 
 	page_table_check_pte_clear(mm, pte);
 
-- 
cgit v1.2.3


From e0e51a0de02cf0e5008d0e167288ad1598005b9e Mon Sep 17 00:00:00 2001
From: Paul Walmsley <pjw@kernel.org>
Date: Mon, 17 Nov 2025 21:19:28 -0700
Subject: riscv: mm: use xchg() on non-atomic_long_t variables, not
 atomic_long_xchg()

Let's not call atomic_long_xchg() on something that's not an
atomic_long_t, and just use xchg() instead.  Continues the cleanup
from commit 546e42c8c6d94 ("riscv: Use an atomic xchg in
pudp_huge_get_and_clear()"),

Cc: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/pgtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ebab8ecd78b2..6bb1f5bdc5d2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -661,7 +661,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address, pte_t *ptep)
 {
 #ifdef CONFIG_SMP
-	pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+	pte_t pte = __pte(xchg(&ptep->pte, 0));
 #else
 	pte_t pte = *ptep;
 
@@ -1004,7 +1004,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 					unsigned long address, pmd_t *pmdp)
 {
 #ifdef CONFIG_SMP
-	pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
+	pmd_t pmd = __pmd(xchg(&pmdp->pmd, 0));
 #else
 	pmd_t pmd = *pmdp;
 
-- 
cgit v1.2.3


From 3f0cbfb8a107a9f0a6e2184425b70ddc6d51f991 Mon Sep 17 00:00:00 2001
From: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
Date: Wed, 27 Aug 2025 00:29:36 +0800
Subject: riscv: add ISA extension parsing for Zilsd and Zclsd

Add parsing for Zilsd and Zclsd ISA extensions which were ratified in
commit f88abf1 ("Integrating load/store pair for RV32 with the
main manual") of the riscv-isa-manual.

Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
Reviewed-by: Nutty Liu <nutty.liu@hotmail.com>
Link: https://patch.msgid.link/20250826162939.1494021-3-pincheng.plct@isrc.iscas.ac.cn
[pjw@kernel.org: cleaned up checkpatch issues, whitespace; updated to apply]
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/hwcap.h |  2 ++
 arch/riscv/kernel/cpufeature.c | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index dfe57b215e6c..4369a2338541 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -108,6 +108,8 @@
 #define RISCV_ISA_EXT_ZICBOP		99
 #define RISCV_ISA_EXT_SVRSW60T59B	100
 #define RISCV_ISA_EXT_ZALASR		101
+#define RISCV_ISA_EXT_ZILSD		102
+#define RISCV_ISA_EXT_ZCLSD		103
 
 #define RISCV_ISA_EXT_XLINUXENVCFG	127
 
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b057362f8fb5..c05b11596c19 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -242,6 +242,28 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data,
 	return -EPROBE_DEFER;
 }
 
+static int riscv_ext_zilsd_validate(const struct riscv_isa_ext_data *data,
+				    const unsigned long *isa_bitmap)
+{
+	if (IS_ENABLED(CONFIG_64BIT))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int riscv_ext_zclsd_validate(const struct riscv_isa_ext_data *data,
+				    const unsigned long *isa_bitmap)
+{
+	if (IS_ENABLED(CONFIG_64BIT))
+		return -EINVAL;
+
+	if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZILSD) &&
+	    __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA))
+		return 0;
+
+	return -EPROBE_DEFER;
+}
+
 static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data,
 				   const unsigned long *isa_bitmap)
 {
@@ -484,6 +506,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
 	__RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate),
 	__RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate),
 	__RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends),
+	__RISCV_ISA_EXT_DATA_VALIDATE(zclsd, RISCV_ISA_EXT_ZCLSD, riscv_ext_zclsd_validate),
+	__RISCV_ISA_EXT_DATA_VALIDATE(zilsd, RISCV_ISA_EXT_ZILSD, riscv_ext_zilsd_validate),
 	__RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
 	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
 	__RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC),
-- 
cgit v1.2.3


From 6118ebed3bdf896038f58d0d1804f551f33e8643 Mon Sep 17 00:00:00 2001
From: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
Date: Wed, 27 Aug 2025 00:29:37 +0800
Subject: riscv: hwprobe: export Zilsd and Zclsd ISA extensions

Export Zilsd and Zclsd ISA extensions through hwprobe.

Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
Reviewed-by: Nutty Liu <nutty.liu@hotmail.com>
Link: https://patch.msgid.link/20250826162939.1494021-4-pincheng.plct@isrc.iscas.ac.cn
[pjw@kernel.org: fixed whitespace; updated to apply]
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/uapi/asm/hwprobe.h | 3 +++
 arch/riscv/kernel/sys_hwprobe.c       | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 1edea2331b8b..cd3c126730c3 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -84,6 +84,9 @@ struct riscv_hwprobe {
 #define		RISCV_HWPROBE_EXT_ZABHA		(1ULL << 58)
 #define		RISCV_HWPROBE_EXT_ZALASR	(1ULL << 59)
 #define		RISCV_HWPROBE_EXT_ZICBOP	(1ULL << 60)
+#define		RISCV_HWPROBE_EXT_ZILSD		(1ULL << 61)
+#define		RISCV_HWPROBE_EXT_ZCLSD		(1ULL << 62)
+
 #define RISCV_HWPROBE_KEY_CPUPERF_0	5
 #define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
 #define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 0f701ace3bb9..e6787ba7f2fc 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -121,6 +121,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
 		EXT_KEY(ZBS);
 		EXT_KEY(ZCA);
 		EXT_KEY(ZCB);
+		EXT_KEY(ZCLSD);
 		EXT_KEY(ZCMOP);
 		EXT_KEY(ZICBOM);
 		EXT_KEY(ZICBOP);
@@ -130,6 +131,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
 		EXT_KEY(ZIHINTNTL);
 		EXT_KEY(ZIHINTPAUSE);
 		EXT_KEY(ZIHPM);
+		EXT_KEY(ZILSD);
 		EXT_KEY(ZIMOP);
 		EXT_KEY(ZKND);
 		EXT_KEY(ZKNE);
-- 
cgit v1.2.3


From f02dd254727665cc292669194b9171bb70413346 Mon Sep 17 00:00:00 2001
From: Zongmin Zhou <zhouzongmin@kylinos.cn>
Date: Thu, 20 Nov 2025 17:58:31 +0800
Subject: riscv/atomic.h: use RISCV_FULL_BARRIER in _arch_atomic* function.

Replace the same code with the pre-defined macro
RISCV_FULL_BARRIER to simplify the code.

Signed-off-by: Zongmin Zhou <zhouzongmin@kylinos.cn>
Link: https://patch.msgid.link/20251120095831.64211-1-min_halo@163.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/atomic.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..3f33dc54f94b 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -203,7 +203,7 @@ ATOMIC_OPS(xor, xor, i)
 		"	add            %[rc], %[p], %[a]\n"		\
 		"	sc." sfx ".rl  %[rc], %[rc], %[c]\n"		\
 		"	bnez           %[rc], 0b\n"			\
-		"	fence          rw, rw\n"			\
+		RISCV_FULL_BARRIER					\
 		"1:\n"							\
 		: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter)	\
 		: [a]"r" (_a), [u]"r" (_u)				\
@@ -242,7 +242,7 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
 		"	addi            %[rc], %[p], 1\n"		\
 		"	sc." sfx ".rl   %[rc], %[rc], %[c]\n"		\
 		"	bnez            %[rc], 0b\n"			\
-		"	fence           rw, rw\n"			\
+		RISCV_FULL_BARRIER					\
 		"1:\n"							\
 		: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter)	\
 		:							\
@@ -268,7 +268,7 @@ static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
 		"	addi            %[rc], %[p], -1\n"		\
 		"	sc." sfx ".rl   %[rc], %[rc], %[c]\n"		\
 		"	bnez            %[rc], 0b\n"			\
-		"	fence           rw, rw\n"			\
+		RISCV_FULL_BARRIER					\
 		"1:\n"							\
 		: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter)	\
 		:							\
@@ -294,7 +294,7 @@ static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
 		"	bltz           %[rc], 1f\n"			\
 		"	sc." sfx ".rl  %[rc], %[rc], %[c]\n"		\
 		"	bnez           %[rc], 0b\n"			\
-		"	fence          rw, rw\n"			\
+		RISCV_FULL_BARRIER					\
 		"1:\n"							\
 		: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter)	\
 		:							\
-- 
cgit v1.2.3


From 5efaf92da4365cb8d1ae6dd7a2d1245c69e09ff5 Mon Sep 17 00:00:00 2001
From: Himanshu Chauhan <hchauhan@ventanamicro.com>
Date: Thu, 10 Jul 2025 18:22:30 +0530
Subject: riscv: Add SBI debug trigger extension and function ids

Debug trigger extension is an SBI extension to support native debugging
in S-mode and VS-mode. This patch adds the extension and the function
IDs defined by the extension.

Signed-off-by: Himanshu Chauhan <hchauhan@ventanamicro.com>
Link: https://patch.msgid.link/20250710125231.653967-2-hchauhan@ventanamicro.com
[pjw@kernel.org: updated to apply]
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 arch/riscv/include/asm/sbi.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index ccc77a89b1e2..5725e0ca4dda 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -37,6 +37,7 @@ enum sbi_ext_id {
 	SBI_EXT_NACL = 0x4E41434C,
 	SBI_EXT_FWFT = 0x46574654,
 	SBI_EXT_MPXY = 0x4D505859,
+	SBI_EXT_DBTR = 0x44425452,
 
 	/* Experimentals extensions must lie within this range */
 	SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -505,6 +506,34 @@ enum sbi_mpxy_rpmi_attribute_id {
 #define SBI_MPXY_CHAN_CAP_SEND_WITHOUT_RESP	BIT(4)
 #define SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS	BIT(5)
 
+/* SBI debug triggers function IDs */
+enum sbi_ext_dbtr_fid {
+	SBI_EXT_DBTR_NUM_TRIGGERS = 0,
+	SBI_EXT_DBTR_SETUP_SHMEM,
+	SBI_EXT_DBTR_TRIG_READ,
+	SBI_EXT_DBTR_TRIG_INSTALL,
+	SBI_EXT_DBTR_TRIG_UPDATE,
+	SBI_EXT_DBTR_TRIG_UNINSTALL,
+	SBI_EXT_DBTR_TRIG_ENABLE,
+	SBI_EXT_DBTR_TRIG_DISABLE,
+};
+
+struct sbi_dbtr_data_msg {
+	unsigned long tstate;
+	unsigned long tdata1;
+	unsigned long tdata2;
+	unsigned long tdata3;
+};
+
+struct sbi_dbtr_id_msg {
+	unsigned long idx;
+};
+
+union sbi_dbtr_shmem_entry {
+	struct sbi_dbtr_data_msg data;
+	struct sbi_dbtr_id_msg id;
+};
+
 /* SBI spec version fields */
 #define SBI_SPEC_VERSION_DEFAULT	0x1
 #define SBI_SPEC_VERSION_MAJOR_SHIFT	24
-- 
cgit v1.2.3


From 987697749def9c5e10d9a2d992f012db61ae1967 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Wed, 19 Nov 2025 18:51:22 +0100
Subject: arm64: dts: mediatek: mt7986: add dtbs with applied overlays for
 bpi-r3

Build devicetree binaries for testing overlays and providing users
full dtb without using overlays.

Suggested-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Fixes: a58c36806741 ("arm64: dts: mediatek: mt7988a-bpi-r4pro: Add mmc  overlays")
Fixes: dec929e61a42 ("arm64: dts: mediatek: mt7988a-bpi-r4-pro: Add PCIe  overlays")
Fixes: 714a80ced07a ("arm64: dts: mediatek: mt7988a-bpi-r4: Add dt  overlays for sd + emmc")
Fixes: 312189ebb802 ("arm64: dts: mt7986: add overlay for SATA power  socket on BPI-R3")
Fixes: 8e01fb15b815 ("arm64: dts: mt7986: add Bananapi R3")
Acked-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20251119175124.48947-2-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index c5fd6191a925..77d76730d61b 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -19,6 +19,27 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nand.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nor.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sata.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd.dtbo
+mt7986a-bananapi-bpi-r3-emmc-nand-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-emmc.dtbo \
+	mt7986a-bananapi-bpi-r3-nand.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nand.dtb
+mt7986a-bananapi-bpi-r3-emmc-nor-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-emmc.dtbo \
+	mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nor.dtb
+mt7986a-bananapi-bpi-r3-sd-nand-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-sd.dtbo \
+	mt7986a-bananapi-bpi-r3-nand.dtbo \
+	mt7986a-bananapi-bpi-r3-sata.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nand.dtb
+mt7986a-bananapi-bpi-r3-sd-nor-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-sd.dtbo \
+	mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nor.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-rfb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986b-rfb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4.dtb
-- 
cgit v1.2.3


From 0773bc6ab7ec0b707632c991fe29edf28f03a641 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Wed, 19 Nov 2025 18:51:23 +0100
Subject: arm64: dts: mediatek: mt7988: add dtbs with applied overlays for
 bpi-r4 (pro)

Build devicetree binaries for testing overlays and providing users
full dtb without using overlays for Bananapi R4 (pro) variants.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Link: https://patch.msgid.link/20251119175124.48947-3-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index 77d76730d61b..cac8f4c6d76f 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -52,6 +52,38 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-sd.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtbo
+mt7988a-bananapi-bpi-r4-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4.dtb \
+	mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-emmc.dtb
+mt7988a-bananapi-bpi-r4-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4.dtb \
+	mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtb
+mt7988a-bananapi-bpi-r4-2g5-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4-2g5.dtb \
+	mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-emmc.dtb
+mt7988a-bananapi-bpi-r4-2g5-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4-2g5.dtb \
+	mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+	mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-emmc.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+	mt7988a-bananapi-bpi-r4-pro-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+	mt7988a-bananapi-bpi-r4-pro-cn15.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+	mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8167-pumpkin.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm-hana.dtb
-- 
cgit v1.2.3


From ce7b1d58609abc2941a1f38094147f439fb74233 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Fri, 5 Dec 2025 22:59:38 +0100
Subject: arm64: dts: mediatek: Apply mt8395-radxa DT overlay at build time

It's a requirement that DT overlays be applied at build time in order to
validate them as overlays are not validated on their own.

Add missing target for mt8395-radxa hd panel overlay.

Fixes: 4c8ff61199a7 ("arm64: dts: mediatek: mt8395-radxa-nio-12l: Add Radxa 8 HD panel")
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Acked-by: AngeloGioacchino Del Regno <angelogiaocchino.delregno@collabora.com>
Link: https://patch.msgid.link/20251205215940.19287-1-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index cac8f4c6d76f..3f76d9ce9879 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -166,6 +166,8 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8390-grinn-genio-700-sbc.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-kontron-3-5-sbc-i1200.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtbo
+mt8395-radxa-nio-12l-8-hd-panel-dtbs := mt8395-radxa-nio-12l.dtb mt8395-radxa-nio-12l-8-hd-panel.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8516-pumpkin.dtb
 
 # Device tree overlays support
-- 
cgit v1.2.3


From 91ff28ae6d050e0ca01ac13eb8ba31d744cf672f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Dec 2025 11:20:07 +0000
Subject: x86/irqflags: Use ASM_OUTPUT_RM in native_save_fl()

clang is generating very inefficient code for native_save_fl() which is
used for local_irq_save() in critical spots.

Allowing the "pop %0" to use memory:

 1) forces the compiler to add annoying stack canaries when
    CONFIG_STACKPROTECTOR_STRONG=y in many places.

 2) Almost always is followed by an immediate "move memory,register"

One good example is _raw_spin_lock_irqsave, with 8 extra instructions

  ffffffff82067a30 <_raw_spin_lock_irqsave>:
  ffffffff82067a30:		...
  ffffffff82067a39:		53						push   %rbx

  // Three instructions to ajust the stack, read the per-cpu canary
  // and copy it to 8(%rsp)
  ffffffff82067a3a:		48 83 ec 10 			sub    $0x10,%rsp
  ffffffff82067a3e:		65 48 8b 05 da 15 45 02 mov    %gs:0x24515da(%rip),%rax 	   # <__stack_chk_guard>
  ffffffff82067a46:		48 89 44 24 08			mov    %rax,0x8(%rsp)

  ffffffff82067a4b:		9c						pushf

  // instead of pop %rbx, compiler uses 2 instructions.
  ffffffff82067a4c:		8f 04 24				pop    (%rsp)
  ffffffff82067a4f:		48 8b 1c 24 			mov    (%rsp),%rbx

  ffffffff82067a53:		fa						cli
  ffffffff82067a54:		b9 01 00 00 00			mov    $0x1,%ecx
  ffffffff82067a59:		31 c0					xor    %eax,%eax
  ffffffff82067a5b:		f0 0f b1 0f 			lock cmpxchg %ecx,(%rdi)
  ffffffff82067a5f:		75 1d					jne    ffffffff82067a7e <_raw_spin_lock_irqsave+0x4e>

  // three instructions to check the stack canary
  ffffffff82067a61:		65 48 8b 05 b7 15 45 02 mov    %gs:0x24515b7(%rip),%rax 	   # <__stack_chk_guard>
  ffffffff82067a69:		48 3b 44 24 08			cmp    0x8(%rsp),%rax
  ffffffff82067a6e:		75 17					jne    ffffffff82067a87

  ...

  // One extra instruction to adjust the stack.
  ffffffff82067a73:		48 83 c4 10 			add    $0x10,%rsp
  ...

  // One more instruction in case the stack was mangled.
  ffffffff82067a87:		e8 a4 35 ff ff			call   ffffffff8205b030 <__stack_chk_fail>

This patch changes nothing for gcc, but for clang saves ~20000 bytes of text
even though more functions are inlined.

  $ size vmlinux.gcc.before vmlinux.gcc.after vmlinux.clang.before vmlinux.clang.after
     text	   data		bss		dec		hex	filename
  45565821	25005462	4704800	75276083	47c9f33	vmlinux.gcc.before
  45565821	25005462	4704800	75276083	47c9f33	vmlinux.gcc.after
  45121072	24638617	5533040	75292729	47ce039	vmlinux.clang.before
  45093887	24638633	5536808	75269328	47c84d0	vmlinux.clang.after

  $ scripts/bloat-o-meter -t vmlinux.clang.before vmlinux.clang.after
  add/remove: 1/2 grow/shrink: 21/533 up/down: 2250/-22112 (-19862)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/irqflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b30e5474c18e..a1193e9d65f2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
 	 */
 	asm volatile("# __raw_save_flags\n\t"
 		     "pushf ; pop %0"
-		     : "=rm" (flags)
+		     : ASM_OUTPUT_RM (flags)
 		     : /* no input */
 		     : "memory");
 
-- 
cgit v1.2.3


From c2296a1e42418556efbeb5636c4fa6aa6106713a Mon Sep 17 00:00:00 2001
From: "Nysal Jan K.A." <nysal@linux.ibm.com>
Date: Tue, 28 Oct 2025 16:25:12 +0530
Subject: powerpc/kexec: Enable SMT before waking offline CPUs

If SMT is disabled or a partial SMT state is enabled, when a new kernel
image is loaded for kexec, on reboot the following warning is observed:

kexec: Waking offline cpu 228.
WARNING: CPU: 0 PID: 9062 at arch/powerpc/kexec/core_64.c:223 kexec_prepare_cpus+0x1b0/0x1bc
[snip]
 NIP kexec_prepare_cpus+0x1b0/0x1bc
 LR  kexec_prepare_cpus+0x1a0/0x1bc
 Call Trace:
  kexec_prepare_cpus+0x1a0/0x1bc (unreliable)
  default_machine_kexec+0x160/0x19c
  machine_kexec+0x80/0x88
  kernel_kexec+0xd0/0x118
  __do_sys_reboot+0x210/0x2c4
  system_call_exception+0x124/0x320
  system_call_vectored_common+0x15c/0x2ec

This occurs as add_cpu() fails due to cpu_bootable() returning false for
CPUs that fail the cpu_smt_thread_allowed() check or non primary
threads if SMT is disabled.

Fix the issue by enabling SMT and resetting the number of SMT threads to
the number of threads per core, before attempting to wake up all present
CPUs.

Fixes: 38253464bc82 ("cpu/SMT: Create topology_smt_thread_allowed()")
Reported-by: Sachin P Bappalige <sachinpb@linux.ibm.com>
Cc: stable@vger.kernel.org # v6.6+
Reviewed-by: Srikar Dronamraju <srikar@linux.ibm.com>
Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20251028105516.26258-1-nysal@linux.ibm.com
---
 arch/powerpc/kexec/core_64.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 222aa326dace..825ab8a88f18 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -202,6 +202,23 @@ static void kexec_prepare_cpus_wait(int wait_state)
 	mb();
 }
 
+
+/*
+ * The add_cpu() call in wake_offline_cpus() can fail as cpu_bootable()
+ * returns false for CPUs that fail the cpu_smt_thread_allowed() check
+ * or non primary threads if SMT is disabled. Re-enable SMT and set the
+ * number of SMT threads to threads per core.
+ */
+static void kexec_smt_reenable(void)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+	lock_device_hotplug();
+	cpu_smt_num_threads = threads_per_core;
+	cpu_smt_control = CPU_SMT_ENABLED;
+	unlock_device_hotplug();
+#endif
+}
+
 /*
  * We need to make sure each present CPU is online.  The next kernel will scan
  * the device tree and assume primary threads are online and query secondary
@@ -216,6 +233,8 @@ static void wake_offline_cpus(void)
 {
 	int cpu = 0;
 
+	kexec_smt_reenable();
+
 	for_each_present_cpu(cpu) {
 		if (!cpu_online(cpu)) {
 			printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
-- 
cgit v1.2.3


From f1164534ad62f0cc247d99650b07bd59ad2a49fd Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Tue, 23 Sep 2025 17:32:16 +0200
Subject: powerpc/tools: drop `-o pipefail` in gcc check scripts

Fixes: 0f71dcfb4aef ("powerpc/ftrace: Add support for -fpatchable-function-entry")
Fixes: b71c9ffb1405 ("powerpc: Add arch/powerpc/tools directory")
Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Jan Stancek <jstancek@redhat.com>
Fixes: 8c50b72a3b4f ("powerpc/ftrace: Add Kconfig & Make glue for mprofile-kernel")
Fixes: abba759796f9 ("powerpc/kbuild: move -mprofile-kernel check to Kconfig")
Tested-by: Justin M. Forbes <jforbes@fedoraproject.org>
Reviewed-by: Naveen N Rao (AMD) <naveen@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/cc6cdd116c3ad9d990df21f13c6d8e8a83815bbd.1758641374.git.jstancek@redhat.com
---
 arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh | 1 -
 arch/powerpc/tools/gcc-check-mprofile-kernel.sh           | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
index 06706903503b..baed467a016b 100755
--- a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
+++ b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 set -e
-set -o pipefail
 
 # To debug, uncomment the following line
 # set -x
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
index 73e331e7660e..6193b0ed0c77 100755
--- a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 set -e
-set -o pipefail
 
 # To debug, uncomment the following line
 # set -x
-- 
cgit v1.2.3


From b94b73567561642323617155bf4ee24ef0d258fe Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Mon, 10 Nov 2025 10:30:22 +1100
Subject: powerpc: Add reloc_offset() to font bitmap pointer used for
 bootx_printf()

Since Linux v6.7, booting using BootX on an Old World PowerMac produces
an early crash. Stan Johnson writes, "the symptoms are that the screen
goes blank and the backlight stays on, and the system freezes (Linux
doesn't boot)."

Further testing revealed that the failure can be avoided by disabling
CONFIG_BOOTX_TEXT. Bisection revealed that the regression was caused by
a change to the font bitmap pointer that's used when btext_init() begins
painting characters on the display, early in the boot process.

Christophe Leroy explains, "before kernel text is relocated to its final
location ... data is addressed with an offset which is added to the
Global Offset Table (GOT) entries at the start of bootx_init()
by function reloc_got2(). But the pointers that are located inside a
structure are not referenced in the GOT and are therefore not updated by
reloc_got2(). It is therefore needed to apply the offset manually by using
PTRRELOC() macro."

Cc: stable@vger.kernel.org
Link: https://lists.debian.org/debian-powerpc/2025/10/msg00111.html
Link: https://lore.kernel.org/linuxppc-dev/d81ddca8-c5ee-d583-d579-02b19ed95301@yahoo.com/
Reported-by: Cedar Maxwell <cedarmaxwell@mac.com>
Closes: https://lists.debian.org/debian-powerpc/2025/09/msg00031.html
Bisected-by: Stan Johnson <userm57@yahoo.com>
Tested-by: Stan Johnson <userm57@yahoo.com>
Fixes: 0ebc7feae79a ("powerpc: Use shared font data")
Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/22b3b247425a052b079ab84da926706b3702c2c7.1762731022.git.fthain@linux-m68k.org
---
 arch/powerpc/kernel/btext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 7f63f1cdc6c3..ca00c4824e31 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -20,6 +20,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/udbg.h>
+#include <asm/setup.h>
 
 #define NO_SCROLL
 
@@ -463,7 +464,7 @@ static noinline void draw_byte(unsigned char c, long locX, long locY)
 {
 	unsigned char *base	= calc_base(locX << 3, locY << 4);
 	unsigned int font_index = c * 16;
-	const unsigned char *font	= font_sun_8x16.data + font_index;
+	const unsigned char *font = PTRRELOC(font_sun_8x16.data) + font_index;
 	int rb			= dispDeviceRowBytes;
 
 	rmci_maybe_on();
-- 
cgit v1.2.3


From fbe409d138b1d8a8b91cdad19cf95495e8ebe1ee Mon Sep 17 00:00:00 2001
From: Aboorva Devarajan <aboorvad@linux.ibm.com>
Date: Mon, 8 Sep 2025 14:21:23 +0530
Subject: powerpc/powernv: Enable cpuidle state detection for POWER11

Extend cpuidle state detection to POWER11 by updating the PVR check.
This ensures POWER11 correctly recognizes supported stop states,
similar to POWER9 and POWER10.

Without Patch: (Power11 - PowerNV systems)

CPUidle driver: powernv_idle
CPUidle governor: menu
analyzing CPU 927:

Number of idle states: 1
Available idle states: snooze
snooze:
Flags/Description: snooze
Latency: 0
Usage: 251631
Duration: 207497715900

--
With Patch: (Power11 - PowerNV systems)

CPUidle driver: powernv_idle
CPUidle governor: menu
analyzing CPU 959:

Number of idle states: 4
Available idle states: snooze stop0_lite stop0 stop3
snooze:
Flags/Description: snooze
Latency: 0
Usage: 2
Duration: 33
stop0_lite:
Flags/Description: stop0_lite
Latency: 1
Usage: 1
Duration: 52
stop0:
Flags/Description: stop0
Latency: 10
Usage: 13
Duration: 1920
stop3:
Flags/Description: stop3
Latency: 45
Usage: 381
Duration: 21638478

Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Reviewed-by: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20250908085123.216780-1-aboorvad@linux.ibm.com
---
 arch/powerpc/platforms/powernv/idle.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index d98b933e4984..e4f4e907f6e3 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1171,8 +1171,9 @@ static void __init pnv_arch300_idle_init(void)
 	u64 max_residency_ns = 0;
 	int i;
 
-	/* stop is not really architected, we only have p9,p10 drivers */
-	if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+	/* stop is not really architected, we only have p9,p10 and p11 drivers */
+	if (!pvr_version_is(PVR_POWER9) && !pvr_version_is(PVR_POWER10) &&
+		!pvr_version_is(PVR_POWER11))
 		return;
 
 	/*
@@ -1189,8 +1190,8 @@ static void __init pnv_arch300_idle_init(void)
 		struct pnv_idle_states_t *state = &pnv_idle_states[i];
 		u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
 
-		/* No deep loss driver implemented for POWER10 yet */
-		if (pvr_version_is(PVR_POWER10) &&
+		/* No deep loss driver implemented for POWER10 and POWER11 yet */
+		if ((pvr_version_is(PVR_POWER10) || pvr_version_is(PVR_POWER11)) &&
 				state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
 			continue;
 
-- 
cgit v1.2.3


From 608328ba5b0619cbc28b409296b5e3840bcb97b6 Mon Sep 17 00:00:00 2001
From: "Christophe Leroy (CS GROUP)" <chleroy@kernel.org>
Date: Fri, 19 Dec 2025 13:23:52 +0100
Subject: powerpc/32: Restore disabling of interrupts at interrupt/syscall exit

Commit 2997876c4a1a ("powerpc/32: Restore clearing of MSR[RI] at
interrupt/syscall exit") delayed clearing of MSR[RI], but missed that
both MSR[RI] and MSR[EE] are cleared at the same time, so the commit
also delayed the disabling of interrupts, leading to unexpected
behaviour.

To fix that, mostly revert the blamed commit and restore the clearing
of MSR[RI] in interrupt_exit_kernel_prepare() instead. For 8xx it
implies adding a synchronising instruction after the mtspr in order to
make sure no instruction counter interrupt (used for perf events) will
fire just after clearing MSR[RI].

Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Closes: https://lore.kernel.org/all/4d0bd05d-6158-1323-3509-744d3fbe8fc7@xenosoft.de/
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/6b05eb1c-fdef-44e0-91a7-8286825e68f1@roeck-us.net/
Fixes: 2997876c4a1a ("powerpc/32: Restore clearing of MSR[RI] at interrupt/syscall exit")
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/585ea521b2be99d293b539bbfae148366cfb3687.1766146895.git.chleroy@kernel.org
---
 arch/powerpc/include/asm/hw_irq.h |  2 +-
 arch/powerpc/include/asm/reg.h    |  1 +
 arch/powerpc/kernel/entry_32.S    | 15 ---------------
 arch/powerpc/kernel/interrupt.c   |  5 ++++-
 4 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 1078ba88efaf..9cd945f2acaf 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -90,7 +90,7 @@ static inline void __hard_EE_RI_disable(void)
 	if (IS_ENABLED(CONFIG_BOOKE))
 		wrtee(0);
 	else if (IS_ENABLED(CONFIG_PPC_8xx))
-		wrtspr(SPRN_NRI);
+		wrtspr_sync(SPRN_NRI);
 	else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
 		__mtmsrd(0, 1);
 	else
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 3fe186635432..3449dd2b577d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1400,6 +1400,7 @@ static inline void mtmsr_isync(unsigned long val)
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 #define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
+#define wrtspr_sync(rn)	asm volatile("mtspr " __stringify(rn) ",2; sync" : : : "memory")
 
 static inline void wrtee(unsigned long val)
 {
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 16f8ee6cb2cd..d8426251b1cd 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -101,17 +101,6 @@ SYM_FUNC_END(__kuep_unlock)
 .endm
 #endif
 
-.macro	clr_ri trash
-#ifndef CONFIG_BOOKE
-#ifdef CONFIG_PPC_8xx
-	mtspr   SPRN_NRI, \trash
-#else
-	li	\trash, MSR_KERNEL & ~MSR_RI
-	mtmsr	\trash
-#endif
-#endif
-.endm
-
 	.globl	transfer_to_syscall
 transfer_to_syscall:
 	stw	r3, ORIG_GPR3(r1)
@@ -160,7 +149,6 @@ ret_from_syscall:
 	cmpwi	r3,0
 	REST_GPR(3, r1)
 syscall_exit_finish:
-	clr_ri	r4
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
 
@@ -237,7 +225,6 @@ fast_exception_return:
 	/* Clear the exception marker on the stack to avoid confusing stacktrace */
 	li	r10, 0
 	stw	r10, 8(r11)
-	clr_ri	r10
 	mtspr	SPRN_SRR1,r9
 	mtspr	SPRN_SRR0,r12
 	REST_GPR(9, r11)
@@ -270,7 +257,6 @@ interrupt_return:
 .Lfast_user_interrupt_return:
 	lwz	r11,_NIP(r1)
 	lwz	r12,_MSR(r1)
-	clr_ri	r4
 	mtspr	SPRN_SRR0,r11
 	mtspr	SPRN_SRR1,r12
 
@@ -313,7 +299,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	cmpwi	cr1,r3,0
 	lwz	r11,_NIP(r1)
 	lwz	r12,_MSR(r1)
-	clr_ri	r4
 	mtspr	SPRN_SRR0,r11
 	mtspr	SPRN_SRR1,r12
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index aea6f7e8e9c6..e63bfde13e03 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -38,7 +38,7 @@ static inline bool exit_must_hard_disable(void)
 #else
 static inline bool exit_must_hard_disable(void)
 {
-	return false;
+	return true;
 }
 #endif
 
@@ -443,6 +443,9 @@ again:
 
 		if (unlikely(stack_store))
 			__hard_EE_RI_disable();
+#else
+	} else {
+		__hard_EE_RI_disable();
 #endif /* CONFIG_PPC64 */
 	}
 
-- 
cgit v1.2.3