summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100-genmai.dts3
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts2
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7778.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7779.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7792.dtsi2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/mach-versatile/integrator_cp.c13
-rw-r--r--arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso5
-rw-r--r--arch/arm64/boot/dts/renesas/r8a78000.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g056.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g057.dtsi2
-rw-r--r--arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi5
-rw-r--r--arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi5
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso5
-rw-r--r--arch/arm64/include/asm/insn.h2
-rw-r--r--arch/arm64/include/asm/kvm_nested.h1
-rw-r--r--arch/arm64/include/asm/page.h2
-rw-r--r--arch/arm64/include/asm/ring_buffer.h10
-rw-r--r--arch/arm64/include/asm/sysreg.h2
-rw-r--r--arch/arm64/include/asm/tlb.h3
-rw-r--r--arch/arm64/kernel/entry-common.c31
-rw-r--r--arch/arm64/kvm/arm.c25
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/clock.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c47
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c16
-rw-r--r--arch/arm64/kvm/hyp/nvhe/trace.c9
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/mmu.c29
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c4
-rw-r--r--arch/arm64/mm/fault.c11
-rw-r--r--arch/csky/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/efi.h4
-rw-r--r--arch/loongarch/include/asm/paravirt.h6
-rw-r--r--arch/loongarch/include/asm/qspinlock.h5
-rw-r--r--arch/loongarch/kernel/kprobes.c14
-rw-r--r--arch/loongarch/kernel/relocate.c50
-rw-r--r--arch/loongarch/mm/init.c4
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/linkage.h2
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/Kconfig.debug3
-rw-r--r--arch/powerpc/configs/g5_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/pmac_low_i2c.h4
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/perf/hv-gpci.c24
-rw-r--r--arch/powerpc/platforms/44x/warp.c2
-rw-r--r--arch/powerpc/platforms/82xx/km82xx.c4
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c34
-rw-r--r--arch/riscv/Kconfig22
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi10
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts10
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts19
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi27
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110.dtsi28
-rw-r--r--arch/riscv/errata/mips/errata.c2
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/kernel/compat_signal.c2
-rw-r--r--arch/riscv/kernel/copy-unaligned.S5
-rw-r--r--arch/riscv/kernel/cpufeature.c20
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/traps_misaligned.c2
-rw-r--r--arch/riscv/kernel/usercfi.c7
-rw-r--r--arch/riscv/kernel/vec-copy-unaligned.S5
-rw-r--r--arch/riscv/kvm/vcpu_insn.c9
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c12
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c2
-rw-r--r--arch/riscv/mm/init.c25
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/kernel/perf_pai.c31
-rw-r--r--arch/s390/kernel/topology.c10
-rw-r--r--arch/s390/kvm/dat.c1
-rw-r--r--arch/s390/kvm/dat.h3
-rw-r--r--arch/s390/kvm/gaccess.c1
-rw-r--r--arch/s390/kvm/gmap.c18
-rw-r--r--arch/s390/kvm/gmap.h61
-rw-r--r--arch/s390/kvm/interrupt.c3
-rw-r--r--arch/s390/kvm/pci.c10
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/x86/entry/Makefile2
-rw-r--r--arch/x86/entry/common.c61
-rw-r--r--arch/x86/entry/entry.S46
-rw-r--r--arch/x86/entry/entry_64_fred.S1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/desc_defs.h2
-rw-r--r--arch/x86/include/asm/entry-common.h2
-rw-r--r--arch/x86/include/asm/fred.h1
-rw-r--r--arch/x86/kernel/acpi/cppc.c6
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/mce/core.c33
-rw-r--r--arch/x86/kernel/idt.c15
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S8
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c23
-rw-r--r--arch/x86/kvm/svm/avic.c12
-rw-r--r--arch/x86/kvm/svm/nested.c43
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/vmenter.S46
-rw-r--r--arch/x86/kvm/vmx/vmx.c36
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/setup.c2
-rw-r--r--arch/xtensa/include/asm/Kbuild1
122 files changed, 777 insertions, 393 deletions
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 483965c5a4de..b154b4e3dfa8 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += agp.h
generic-y += asm-offsets.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 4c69522e0328..483caacc6988 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
index 3c3756509714..da552a66615e 100644
--- a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
+++ b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
@@ -34,9 +34,6 @@
clocks = <&mstp9_clks R7S72100_CLK_SPIBSC0>;
power-domains = <&cpg_clocks>;
- #address-cells = <1>;
- #size-cells = <1>;
-
partitions {
compatible = "fixed-partitions";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
index 91178fb9e721..3306bc9b7bc3 100644
--- a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
+++ b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
@@ -36,8 +36,6 @@
power-domains = <&cpg_clocks>;
bank-width = <4>;
device-width = <1>;
- #address-cells = <1>;
- #size-cells = <1>;
partitions {
compatible = "fixed-partitions";
diff --git a/arch/arm/boot/dts/renesas/r7s72100.dtsi b/arch/arm/boot/dts/renesas/r7s72100.dtsi
index 245c26bb8e03..6ec57ffa72e8 100644
--- a/arch/arm/boot/dts/renesas/r7s72100.dtsi
+++ b/arch/arm/boot/dts/renesas/r7s72100.dtsi
@@ -37,7 +37,7 @@
clock-div = <3>;
};
- bsc: bus {
+ bsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7778.dtsi b/arch/arm/boot/dts/renesas/r8a7778.dtsi
index 859dd29dfce3..7db456b19795 100644
--- a/arch/arm/boot/dts/renesas/r8a7778.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7778.dtsi
@@ -40,7 +40,7 @@
spi2 = &hspi2;
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7779.dtsi b/arch/arm/boot/dts/renesas/r8a7779.dtsi
index e437c22f452d..9e8a7e190c89 100644
--- a/arch/arm/boot/dts/renesas/r8a7779.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7779.dtsi
@@ -704,7 +704,7 @@
};
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7792.dtsi b/arch/arm/boot/dts/renesas/r8a7792.dtsi
index 9e0de69ac3a3..fbdbcff1cbed 100644
--- a/arch/arm/boot/dts/renesas/r8a7792.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7792.dtsi
@@ -86,7 +86,7 @@
bootph-all;
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 03657ff8fbe3..decad5f2c826 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += early_ioremap.h
generic-y += extable.h
generic-y += flat.h
generic-y += parport.h
+generic-y += ring_buffer.h
generated-y += mach-types.h
generated-y += unistd-nr.h
diff --git a/arch/arm/mach-versatile/integrator_cp.c b/arch/arm/mach-versatile/integrator_cp.c
index 2ed4ded56b3f..03dfb5f720b7 100644
--- a/arch/arm/mach-versatile/integrator_cp.c
+++ b/arch/arm/mach-versatile/integrator_cp.c
@@ -86,14 +86,6 @@ static u64 notrace intcp_read_sched_clock(void)
return val;
}
-static void __init intcp_init_early(void)
-{
- cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator");
- if (IS_ERR(cm_map))
- return;
- sched_clock_register(intcp_read_sched_clock, 32, 24000000);
-}
-
static void __init intcp_init_irq_of(void)
{
cm_init();
@@ -119,6 +111,10 @@ static void __init intcp_init_of(void)
{
struct device_node *cpcon;
+ cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator");
+ if (!IS_ERR(cm_map))
+ sched_clock_register(intcp_read_sched_clock, 32, 24000000);
+
cpcon = of_find_matching_node(NULL, intcp_syscon_match);
if (!cpcon)
return;
@@ -138,7 +134,6 @@ static const char * intcp_dt_board_compat[] = {
DT_MACHINE_START(INTEGRATOR_CP_DT, "ARM Integrator/CP (Device Tree)")
.reserve = integrator_reserve,
.map_io = intcp_map_io,
- .init_early = intcp_init_early,
.init_irq = intcp_init_irq_of,
.init_machine = intcp_init_of,
.dt_compat = intcp_dt_board_compat,
diff --git a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
index 258f8668ca36..90767d74e21b 100644
--- a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
+++ b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
@@ -27,7 +27,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@1 {
+ reg = <1>;
+
lvds1_out: endpoint {
remote-endpoint = <&panel_in>;
};
diff --git a/arch/arm64/boot/dts/renesas/r8a78000.dtsi b/arch/arm64/boot/dts/renesas/r8a78000.dtsi
index 3e1c98903cea..3ec1b53d2782 100644
--- a/arch/arm64/boot/dts/renesas/r8a78000.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a78000.dtsi
@@ -699,7 +699,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0700000 0 0x40>;
interrupts = <GIC_ESPI 10 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -709,7 +709,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0704000 0 0x40>;
interrupts = <GIC_ESPI 11 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -719,7 +719,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0708000 0 0x40>;
interrupts = <GIC_ESPI 12 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -729,7 +729,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc070c000 0 0x40>;
interrupts = <GIC_ESPI 13 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
index 40525470194e..7ccddd6a4a9a 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
@@ -1327,6 +1327,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
index 9581af58024e..6f6fe5f36bef 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
@@ -1345,6 +1345,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
@@ -1355,6 +1356,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
index 4d2b0655859a..3feffa4f16a9 100644
--- a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
+++ b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
@@ -46,7 +46,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@0 {
+ reg = <0>;
+
csi2_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
index 36707576030d..f5412578ee65 100644
--- a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
+++ b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
@@ -26,7 +26,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@0 {
+ reg = <0>;
+
du_out_rgb: endpoint {
remote-endpoint = <&adv7513_in>;
};
diff --git a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
index c83a30adc6ad..7807c3f80409 100644
--- a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
+++ b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
@@ -27,7 +27,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@1 {
+ reg = <1>;
+
lvds0_out: endpoint {
remote-endpoint = <&panel_in>;
};
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index f463a654a2bb..cc0702fa64a7 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -409,7 +409,7 @@ __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
__AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000)
__AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000)
__AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000)
-__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000)
+__AARCH64_INSN_FUNCS(bcond, 0xFF000000, 0x54000000)
__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 091544e6af44..dc2957662ff2 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
{
return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_DS) ? TCR_DS : 0) |
((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
tcr_el2_ps_to_tcr_el1_ips(tcr) |
(tcr & TCR_EL2_TG0_MASK) |
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index e25d0d18f6d7..58200de8a221 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
-bool tag_clear_highpages(struct page *to, int numpages);
+bool tag_clear_highpages(struct page *to, int numpages, bool clear_pages);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
diff --git a/arch/arm64/include/asm/ring_buffer.h b/arch/arm64/include/asm/ring_buffer.h
new file mode 100644
index 000000000000..62316c406888
--- /dev/null
+++ b/arch/arm64/include/asm/ring_buffer.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_ARM64_RING_BUFFER_H
+#define _ASM_ARM64_RING_BUFFER_H
+
+#include <asm/cacheflush.h>
+
+/* Flush D-cache on persistent ring buffer */
+#define arch_ring_buffer_flush_range(start, end) dcache_clean_pop(start, end)
+
+#endif /* _ASM_ARM64_RING_BUFFER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 736561480f36..7aa08d59d494 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -844,7 +844,7 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
- SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
+ SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 10869d7731b8..751bd57bc3ba 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -53,7 +53,8 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
- tlbf_t flags = tlb->freed_tables ? TLBF_NONE : TLBF_NOWALKCACHE;
+ tlbf_t flags = (tlb->freed_tables || tlb->unshared_tables) ?
+ TLBF_NONE : TLBF_NOWALKCACHE;
unsigned long stride = tlb_get_unmap_size(tlb);
int tlb_level = tlb_get_level(tlb);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index cb54335465f6..c7a23f7c2212 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -62,6 +62,13 @@ static void noinstr arm64_exit_to_kernel_mode(struct pt_regs *regs,
irqentry_exit_to_kernel_mode_after_preempt(regs, state);
}
+static __always_inline void arm64_syscall_enter_from_user_mode(struct pt_regs *regs)
+{
+ enter_from_user_mode(regs);
+ mte_disable_tco_entry(current);
+ sme_enter_from_user_mode();
+}
+
/*
* Handle IRQ/context state management when entering from user mode.
* Before this function is called it is not safe to call regular kernel code,
@@ -70,20 +77,30 @@ static void noinstr arm64_exit_to_kernel_mode(struct pt_regs *regs,
static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
{
enter_from_user_mode(regs);
+ rseq_note_user_irq_entry();
mte_disable_tco_entry(current);
sme_enter_from_user_mode();
}
+static __always_inline void arm64_syscall_exit_to_user_mode(struct pt_regs *regs)
+{
+ local_irq_disable();
+ syscall_exit_to_user_mode_prepare(regs);
+ local_daif_mask();
+ sme_exit_to_user_mode();
+ mte_check_tfsr_exit();
+ exit_to_user_mode();
+}
+
/*
* Handle IRQ/context state management when exiting to user mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-
static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
local_irq_disable();
- exit_to_user_mode_prepare_legacy(regs);
+ irqentry_exit_to_user_mode_prepare(regs);
local_daif_mask();
sme_exit_to_user_mode();
mte_check_tfsr_exit();
@@ -92,7 +109,7 @@ static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
{
- arm64_exit_to_user_mode(regs);
+ arm64_syscall_exit_to_user_mode(regs);
}
/*
@@ -716,12 +733,12 @@ static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_svc(struct pt_regs *regs)
{
- arm64_enter_from_user_mode(regs);
+ arm64_syscall_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
fpsimd_syscall_enter();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc(regs);
- arm64_exit_to_user_mode(regs);
+ arm64_syscall_exit_to_user_mode(regs);
fpsimd_syscall_exit();
}
@@ -868,11 +885,11 @@ static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- arm64_enter_from_user_mode(regs);
+ arm64_syscall_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc_compat(regs);
- arm64_exit_to_user_mode(regs);
+ arm64_syscall_exit_to_user_mode(regs);
}
static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 8bb2c7422cc8..9453321ef8c6 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -4,6 +4,7 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
@@ -554,8 +555,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_destroy_mpidr_data(vcpu->kvm);
err = kvm_vgic_vcpu_init(vcpu);
- if (err)
+ if (err) {
+ kvm_vgic_vcpu_destroy(vcpu);
return err;
+ }
err = kvm_share_hyp(vcpu, vcpu + 1);
if (err)
@@ -2638,6 +2641,22 @@ static int init_pkvm_host_sve_state(void)
return 0;
}
+static int pkvm_check_sme_dvmsync_fw_call(void)
+{
+ struct arm_smccc_res res;
+
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714))
+ return 0;
+
+ arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ if (res.a0) {
+ kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
/*
* Finalizes the initialization of hyp mode, once everything else is initialized
* and the initialziation process cannot fail.
@@ -2838,6 +2857,10 @@ static int __init init_hyp_mode(void)
if (err)
goto out_err;
+ err = pkvm_check_sme_dvmsync_fw_call();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 98b2976837b1..bf0eb5e43427 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
}
-#define __deactivate_fgt(htcxt, vcpu, reg) \
+#define __deactivate_fgt(hctxt, vcpu, reg) \
do { \
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
SYS_ ## reg); \
diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
index 32fc4313fe43..a7fc61976fd0 100644
--- a/arch/arm64/kvm/hyp/nvhe/clock.c
+++ b/arch/arm64/kvm/hyp/nvhe/clock.c
@@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
struct clock_data *clock = &trace_clock_data;
u64 bank = clock->cur ^ 1;
+ if (!mult || shift >= 64)
+ return;
+
clock->data[bank].mult = mult;
clock->data[bank].shift = shift;
clock->data[bank].epoch_ns = epoch_ns;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 28a471d1927c..25f04629014e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -5,6 +5,7 @@
*/
#include <linux/kvm_host.h>
+
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -14,6 +15,7 @@
#include <hyp/fault.h>
+#include <nvhe/arm-smccc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool;
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
#define current_vm (*this_cpu_ptr(&__current_vm))
+static void pkvm_sme_dvmsync_fw_call(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
+ struct arm_smccc_res res;
+
+ /*
+ * Ignore the return value. Probing for the workaround
+ * availability took place in init_hyp_mode().
+ */
+ hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ }
+}
+
static void guest_lock_component(struct pkvm_hyp_vm *vm)
{
hyp_spin_lock(&vm->lock);
@@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
addr, size, &host_s2_pool,
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
- if (!ret)
+ if (!ret) {
+ /*
+ * After stage2 maintenance has happened, but before the page
+ * owner has changed.
+ */
+ pkvm_sme_dvmsync_fw_call();
__host_update_page_state(addr, size, PKVM_NOPAGE);
+ }
return ret;
}
@@ -1369,6 +1390,22 @@ unlock:
return ret && ret != -EHWPOISON ? ret : 0;
}
+/*
+ * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
+ * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
+ * bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
+ * conservative by one for the block.
+ */
+static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
+{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+
+ if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
+ return -ENOMEM;
+
+ return 0;
+}
+
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
@@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (ret)
goto unlock;
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
meta = host_stage2_encode_gfn_meta(vm, gfn);
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
PKVM_ID_GUEST, meta));
@@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
}
}
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
for_each_hyp_page(page, phys, size) {
set_host_state(page, PKVM_PAGE_SHARED_OWNED);
page->host_share_guest_count++;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index e7496eb85628..eb1c10120f9f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = {
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
{
struct hyp_page *p = hyp_virt_to_page(virt);
+ unsigned long min_pages, seeded = 0;
int i;
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
+ /*
+ * Mirror pkvm_refill_memcache() for the share/donate pre-checks;
+ * the selftest invokes those functions directly and would
+ * otherwise see an empty memcache.
+ */
+ min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu);
+
for (i = 0; i < pkvm_selftest_pages(); i++) {
if (p[i].refcount)
continue;
p[i].refcount = 1;
- hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ if (seeded < min_pages) {
+ push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache,
+ hyp_page_to_virt(&p[i]), hyp_virt_to_phys);
+ seeded++;
+ } else {
+ hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ }
}
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();
diff --git a/arch/arm64/kvm/hyp/nvhe/trace.c b/arch/arm64/kvm/hyp/nvhe/trace.c
index a6ca27b18e15..e7e150ab265f 100644
--- a/arch/arm64/kvm/hyp/nvhe/trace.c
+++ b/arch/arm64/kvm/hyp/nvhe/trace.c
@@ -164,13 +164,16 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
return ret;
}
-static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size)
+static bool hyp_trace_desc_is_valid(struct hyp_trace_desc *desc, size_t desc_size)
{
struct ring_buffer_desc *rb_desc;
unsigned int cpu;
size_t nr_bpages;
void *desc_end;
+ if (!is_protected_kvm_enabled())
+ return true;
+
/*
* Both desc_size and bpages_backing_size are untrusted host-provided
* values. We rely on __pkvm_host_donate_hyp() to enforce their validity.
@@ -212,8 +215,10 @@ int __tracing_load(unsigned long desc_hva, size_t desc_size)
if (ret)
return ret;
- if (!hyp_trace_desc_validate(desc, desc_size))
+ if (!hyp_trace_desc_is_valid(desc, desc_size)) {
+ ret = -EINVAL;
goto err_release_desc;
+ }
hyp_spin_lock(&trace_buffer.lock);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 9db3f11a4754..1e8995add14f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
- __deactivate_traps(vcpu);
+ if (vcpu)
+ __deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d089c107d9b7..4da9281312eb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc {
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
{
bool write_fault, exec_fault;
+ bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
unsigned long mmu_seq;
struct page *page;
struct kvm *kvm = s2fd->vcpu->kvm;
- void *memcache;
+ void *memcache = NULL;
kvm_pfn_t pfn;
gfn_t gfn;
int ret;
- memcache = get_mmu_memcache(s2fd->vcpu);
- ret = topup_mmu_memcache(s2fd->vcpu, memcache);
- if (ret)
- return ret;
+ if (!perm_fault) {
+ memcache = get_mmu_memcache(s2fd->vcpu);
+ ret = topup_mmu_memcache(s2fd->vcpu, memcache);
+ if (ret)
+ return ret;
+ }
if (s2fd->nested)
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
@@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
goto out_unlock;
}
- ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
- __pfn_to_phys(pfn), prot,
- memcache, flags);
+ if (perm_fault) {
+ /*
+ * Drop the SW bits in favour of those stored in the
+ * PTE, which will be preserved.
+ */
+ prot &= ~KVM_NV_GUEST_MAP_SZ;
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa,
+ prot, flags);
+ } else {
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
+ __pfn_to_phys(pfn), prot,
+ memcache, flags);
+ }
out_unlock:
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2ea9f1c7ebcd..1d7e5d560af4 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -2307,6 +2307,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
/* dte entry is valid */
offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+ /* Mimic the MAPD behaviour and reject invalid EID bits. */
+ if (num_eventid_bits > VITS_TYPER_IDBITS)
+ return -EINVAL;
+
if (!vgic_its_check_id(its, baser, id, NULL))
return -EINVAL;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 0f3c5c7ca054..739800835920 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -1018,7 +1018,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
return vma_alloc_folio(flags, 0, vma, vaddr);
}
-bool tag_clear_highpages(struct page *page, int numpages)
+bool tag_clear_highpages(struct page *page, int numpages, bool clear_pages)
{
/*
* Check if MTE is supported and fall back to clear_highpage().
@@ -1026,13 +1026,16 @@ bool tag_clear_highpages(struct page *page, int numpages)
* post_alloc_hook() will invoke tag_clear_highpages().
*/
if (!system_supports_mte())
- return false;
+ return clear_pages;
/* Newly allocated pages, shouldn't have been tagged yet */
for (int i = 0; i < numpages; i++, page++) {
WARN_ON_ONCE(!try_page_mte_tagging(page));
- mte_zero_clear_page_tags(page_address(page));
+ if (clear_pages)
+ mte_zero_clear_page_tags(page_address(page));
+ else
+ mte_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
}
- return true;
+ return false;
}
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 3a5c7f6e5aac..7dca0c6cdc84 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += qspinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += vmlinux.lds.h
generic-y += text-patching.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 1efa1e993d4b..0f887d4238ed 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += extable.h
generic-y += iomap.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 9034b583a88a..7e92957baf6a 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -10,5 +10,6 @@ generic-y += qrwlock.h
generic-y += user.h
generic-y += ioctl.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
generic-y += statfs.h
generic-y += text-patching.h
diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h
index eddc8e79b3fa..1ad764b18c3e 100644
--- a/arch/loongarch/include/asm/efi.h
+++ b/arch/loongarch/include/asm/efi.h
@@ -30,6 +30,8 @@ static inline unsigned long efi_get_kimg_min_align(void)
return SZ_2M;
}
-#define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS)
+unsigned long efi_get_kimg_kaslr_address(void);
+
+#define EFI_KIMG_PREFERRED_ADDRESS efi_get_kimg_kaslr_address()
#endif /* _ASM_LOONGARCH_EFI_H */
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
index 0111f0ad5f73..acae1c5e5f88 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -4,6 +4,12 @@
#ifdef CONFIG_PARAVIRT
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
+
int __init pv_ipi_init(void);
int __init pv_time_init(void);
int __init pv_spinlock_init(void);
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
index 0ee15b3b3937..fbfc6be82f26 100644
--- a/arch/loongarch/include/asm/qspinlock.h
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -3,12 +3,9 @@
#define _ASM_LOONGARCH_QSPINLOCK_H
#include <asm/kvm_para.h>
-#include <linux/jump_label.h>
+#include <asm/paravirt.h>
#ifdef CONFIG_PARAVIRT
-DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
-DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
-DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
#define virt_spin_lock virt_spin_lock
diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c
index 8ba391cfabb0..1985ed30dd16 100644
--- a/arch/loongarch/kernel/kprobes.c
+++ b/arch/loongarch/kernel/kprobes.c
@@ -60,16 +60,18 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
/* Install breakpoint in text */
void arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = KPROBE_BP_INSN;
- flush_insn_slot(p);
+ u32 insn = KPROBE_BP_INSN;
+
+ larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
/* Remove breakpoint from text */
void arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_insn_slot(p);
+ u32 insn = p->opcode;
+
+ larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -184,16 +186,16 @@ static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
switch (kcb->kprobe_status) {
- case KPROBE_HIT_SS:
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
kprobes_inc_nmissed_count(p);
setup_singlestep(p, regs, kcb, 1);
break;
+ case KPROBE_HIT_SS:
case KPROBE_REENTER:
pr_warn("Failed to recover from reentered kprobes.\n");
dump_kprobe(p);
- WARN_ON_ONCE(1);
+ BUG();
break;
default:
WARN_ON(1);
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 16f6a9b39659..4b61a9632a98 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -134,11 +134,23 @@ early_param("nokaslr", nokaslr);
#define KASLR_DISABLED_MESSAGE "KASLR is disabled by %s in %s cmdline.\n"
+/*
+ * Note: strictly-defined KASLR means the kernel's final runtime address
+ * has a random offset from the kernel's load address, which is implemented
+ * in relocate.c; broadly-defined KALSR means the kernel's final runtime
+ * address has a random offset from the kernel's link address (a.k.a.
+ * VMLINUX_LOAD_ADDRESS), which also include the efistlub implementation,
+ * kexec_file implementation and QEMU direct kernel boot. kaslr_disabled()
+ * return true only means strictly-defined KASLR is disabled.
+ */
static inline __init bool kaslr_disabled(void)
{
char *str;
const char *builtin_cmdline = CONFIG_CMDLINE;
+ if (kaslr_offset())
+ return true; /* KASLR is performed during early boot. */
+
str = strstr(builtin_cmdline, "nokaslr");
if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) {
pr_info(KASLR_DISABLED_MESSAGE, "\'nokaslr\'", "built-in");
@@ -210,14 +222,52 @@ static inline void __init *determine_relocation_address(void)
return RELOCATED_KASLR(destination);
}
+static unsigned long __init determine_initrd_address(unsigned long *size)
+{
+ unsigned long start = 0;
+ unsigned long key_length;
+ char *p, *endp, *key = "initrd=";
+
+ key_length = strlen(key);
+ p = strstr(boot_command_line, key);
+
+ if (!p) {
+ key = "initrdmem=";
+ key_length = strlen(key);
+ p = strstr(boot_command_line, key);
+ }
+
+ if (p == boot_command_line || (p > boot_command_line && *(p - 1) == ' ')) {
+ p += key_length;
+ start = memparse(p, &endp);
+ if (*endp == ',')
+ *size = memparse(endp + 1, NULL);
+ }
+
+ return start;
+}
+
static inline int __init relocation_addr_valid(void *location_new)
{
+ unsigned long kernel_start, kernel_size;
+ unsigned long initrd_start, initrd_size = 0;
+
if ((unsigned long)location_new & 0x00000ffff)
return 0; /* Inappropriately aligned new location */
if ((unsigned long)location_new < (unsigned long)_end)
return 0; /* New location overlaps original kernel */
+ initrd_start = determine_initrd_address(&initrd_size);
+ if (initrd_start && initrd_size) {
+ kernel_start = PHYSADDR(location_new);
+ kernel_size = (unsigned long)_end - (unsigned long)_text;
+
+ if (kernel_start < (initrd_start + initrd_size) &&
+ initrd_start < (kernel_start + kernel_size))
+ return 0; /* initrd/initramfs overlaps kernel */
+ }
+
return 1;
}
#endif
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 3f9ab54114c5..031b39eb081c 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -123,11 +123,7 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn);
- /* With altmap the first mapped page is offset from @start */
- if (altmap)
- page += vmem_altmap_offset(altmap);
__remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index b282e0dd8dc1..62543bf305ff 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -3,5 +3,6 @@ generated-y += syscall_table.h
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += spinlock.h
generic-y += text-patching.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 7178f990e8b3..0030309b47ad 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += syscalls.h
generic-y += tlb.h
generic-y += user.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 684569b2ecd6..9771c3d85074 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,5 +12,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 28004301c236..0a2530964413 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cmpxchg.h
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += spinlock.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/nios2/include/asm/linkage.h b/arch/nios2/include/asm/linkage.h
index 211302301a8a..c4073235852b 100644
--- a/arch/nios2/include/asm/linkage.h
+++ b/arch/nios2/include/asm/linkage.h
@@ -12,4 +12,6 @@
#define __ALIGN .align 4
#define __ALIGN_STR ".align 4"
+#define _THIS_IP_ ({ unsigned long __ip; asm volatile("nextpc %0" : "=r" (__ip)); __ip; })
+
#endif
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index cef49d60d74c..8aa34621702d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -8,4 +8,5 @@ generic-y += spinlock_types.h
generic-y += spinlock.h
generic-y += qrwlock_types.h
generic-y += qrwlock.h
+generic-y += ring_buffer.h
generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4fb596d94c89..d48d158f7241 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
generic-y += agp.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index f15e5920080b..e8718bc13eeb 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -83,11 +83,10 @@ config MSI_BITMAP_SELFTEST
depends on DEBUG_KERNEL
config GUEST_STATE_BUFFER_TEST
- def_tristate n
+ def_tristate KUNIT_ALL_TESTS
prompt "Enable Guest State Buffer unit tests"
depends on KUNIT
depends on KVM_BOOK3S_HV_POSSIBLE
- default KUNIT_ALL_TESTS
help
The Guest State Buffer is a data format specified in the PAPR.
It is by hcalls to communicate the state of L2 guests between
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index e9996711b362..5ca1676e6058 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -85,6 +85,8 @@ CONFIG_PMAC_SMU=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_WINDFARM=y
CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM72=y
+CONFIG_WINDFARM_RM31=y
CONFIG_WINDFARM_PM91=y
CONFIG_WINDFARM_PM112=y
CONFIG_WINDFARM_PM121=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ccabc6e17168..eda1fec7ffd9 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -393,6 +393,7 @@ CONFIG_NETCONSOLE=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
+CONFIG_EL3=m
CONFIG_VORTEX=m
CONFIG_TYPHOON=m
CONFIG_ADAPTEC_STARFIRE=m
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 2e23533b67e3..805b5aeebb6f 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -5,4 +5,5 @@ generated-y += syscall_table_spu.h
generic-y += agp.h
generic-y += mcs_spinlock.h
generic-y += qrwlock.h
+generic-y += ring_buffer.h
generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/pmac_low_i2c.h b/arch/powerpc/include/asm/pmac_low_i2c.h
index 21bd7297c87f..fead8fae08ab 100644
--- a/arch/powerpc/include/asm/pmac_low_i2c.h
+++ b/arch/powerpc/include/asm/pmac_low_i2c.h
@@ -79,10 +79,6 @@ extern int pmac_i2c_match_adapter(struct device_node *dev,
struct i2c_adapter *adapter);
-/* (legacy) Locking functions exposed to i2c-keywest */
-extern int pmac_low_i2c_lock(struct device_node *np);
-extern int pmac_low_i2c_unlock(struct device_node *np);
-
/* Access functions for platform code */
extern int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled);
extern void pmac_i2c_close(struct pmac_i2c_bus *bus);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 4bbeb8644d3d..b4472288e0d4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -458,6 +458,10 @@ DEFINE_PER_CPU(u8, irq_work_pending);
#endif /* 32 vs 64 bit */
+/*
+ * Must be called with preemption disabled since it updates
+ * per-CPU irq_work state and programs the local CPU decrementer.
+ */
void arch_irq_work_raise(void)
{
/*
@@ -471,10 +475,8 @@ void arch_irq_work_raise(void)
* which could get tangled up if we're messing with the same state
* here.
*/
- preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
- preempt_enable();
}
static void set_dec_or_work(u64 val)
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 5cac2cf3bd1e..10c82cf8f5b3 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -210,7 +210,7 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att
0, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
@@ -244,12 +244,14 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att
starting_index, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
}
+out_success:
+ put_cpu_var(hv_gpci_reqb);
return n;
out:
@@ -278,7 +280,7 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute
0, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
@@ -312,12 +314,14 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute
starting_index, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
}
+out_success:
+ put_cpu_var(hv_gpci_reqb);
return n;
out:
@@ -346,7 +350,7 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
0, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
@@ -382,12 +386,14 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
starting_index, secondary_index, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
}
+out_success:
+ put_cpu_var(hv_gpci_reqb);
return n;
out:
@@ -416,7 +422,7 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device
0, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
@@ -448,12 +454,14 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device
starting_index, 0, buf, &n, arg);
if (!ret)
- return n;
+ goto out_success;
if (ret != H_PARAMETER)
goto out;
}
+out_success:
+ put_cpu_var(hv_gpci_reqb);
return n;
out:
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index a5001d32f978..6f674f86dc85 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -293,6 +293,8 @@ static int pika_dtm_thread(void __iomem *fpga)
schedule_timeout(HZ);
}
+ put_device(&client->dev);
+
return 0;
}
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
index 99f0f0f41876..4ad223525e89 100644
--- a/arch/powerpc/platforms/82xx/km82xx.c
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -27,8 +27,8 @@
static void __init km82xx_pic_init(void)
{
- struct device_node *np __free(device_node);
- np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic");
+ struct device_node *np __free(device_node) = of_find_compatible_node(NULL,
+ NULL, "fsl,pq2-pic");
if (!np) {
pr_err("PIC init: can not find cpm-pic node\n");
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 73b7f4e8c047..da72a30ab865 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -1058,40 +1058,6 @@ int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter)
}
EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter);
-int pmac_low_i2c_lock(struct device_node *np)
-{
- struct pmac_i2c_bus *bus, *found = NULL;
-
- list_for_each_entry(bus, &pmac_i2c_busses, link) {
- if (np == bus->controller) {
- found = bus;
- break;
- }
- }
- if (!found)
- return -ENODEV;
- return pmac_i2c_open(bus, 0);
-}
-EXPORT_SYMBOL_GPL(pmac_low_i2c_lock);
-
-int pmac_low_i2c_unlock(struct device_node *np)
-{
- struct pmac_i2c_bus *bus, *found = NULL;
-
- list_for_each_entry(bus, &pmac_i2c_busses, link) {
- if (np == bus->controller) {
- found = bus;
- break;
- }
- }
- if (!found)
- return -ENODEV;
- pmac_i2c_close(bus);
- return 0;
-}
-EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock);
-
-
int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled)
{
int rc;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d235396c4514..c5754942cf85 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -937,6 +937,28 @@ config RISCV_VECTOR_MISALIGNED
help
Enable detecting support for vector misaligned loads and stores.
+config RISCV_SBI_FWFT_DELEGATE_MISALIGNED
+ bool "Request firmware delegation of unaligned access exceptions"
+ depends on RISCV_SBI
+ depends on NONPORTABLE
+ help
+ Use SBI FWFT to request delegation of load address misaligned and
+ store address misaligned exceptions, if possible, and prefer Linux
+ kernel emulation of these accesses to firmware emulation.
+
+ Unfortunately, Linux's emulation is still incomplete. Namely, it
+ currently does not handle vector instructions and KVM guest accesses.
+ On platforms where these accesses would have been handled by firmware,
+ enabling this causes unexpected kernel oopses, userspaces crashes and
+ KVM guest crashes. If you are sure that these are not a problem for
+ your platform, you can say Y here, which may improve performance.
+
+ Saying N here will not worsen emulation support for unaligned accesses
+ even in the case where the firmware also has incomplete support. It
+ simply keeps the firmware's emulation enabled.
+
+ If you don't know what to do here, say N.
+
choice
prompt "Unaligned Accesses Support"
default RISCV_PROBE_UNALIGNED_ACCESS
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
index 2d14e92f068d..9078e5b1e49c 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
@@ -101,16 +101,6 @@
status = "okay";
};
-&i2c0 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2c0_fabric>;
-};
-
-&i2c1 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2c1_mssio>;
-};
-
&mmuart1 {
pinctrl-names = "default";
pinctrl-0 = <&uart1_fabric>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
index 8afedece89d1..636493f6584d 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
@@ -14,6 +14,16 @@
"microchip,mpfs";
};
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_fabric>;
+};
+
+&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_mssio>;
+};
+
&syscontroller {
microchip,bitstream-flash = <&sys_ctrl_flash>;
};
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 556aa9638282..6fadce815c9a 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -11,3 +11,22 @@
"microchip,mpfs-icicle-kit",
"microchip,mpfs";
};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_fabric>;
+};
+
+/*
+ * Due to silicon errata, routing via MSS IOs doesn't work on ES devices.
+ * Instead, i2c1, appearing on B1/C1, which are normally MSS IOs, is routed
+ * via the fabric and back to B1/C1 via "fabric-test" functionality.
+ * This is done silently by Libero, so the iomux0 setting for i2c1 has to
+ * be fabric IO, despite tooling etc saying that MSS IOs are used.
+ *
+ * See Section 3.3 of https://ww1.microchip.com/downloads/aemDocuments/documents/FPGA/ProductDocuments/Errata/polarfiresoc/microsemi_polarfire_soc_fpga_egineering_samples_errata_er0219_v1.pdf
+ */
+&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_fabric>;
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index 8cfe8033305d..a7a1c09a2c90 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -135,29 +135,6 @@
clock-frequency = <49152000>;
};
-&camss {
- assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
- <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>;
- assigned-clock-rates = <49500000>, <198000000>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- };
-
- port@1 {
- reg = <1>;
-
- camss_from_csi2rx: endpoint {
- remote-endpoint = <&csi2rx_to_camss>;
- };
- };
- };
-};
-
&csi2rx {
assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>;
assigned-clock-rates = <297000000>;
@@ -175,9 +152,7 @@
port@1 {
reg = <1>;
- csi2rx_to_camss: endpoint {
- remote-endpoint = <&camss_from_csi2rx>;
- };
+ /* remote CAMSS endpoint */
};
};
};
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi
index 6e56e9d20bb0..9c3e4598747e 100644
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -1199,34 +1199,6 @@
#phy-cells = <0>;
};
- camss: isp@19840000 {
- compatible = "starfive,jh7110-camss";
- reg = <0x0 0x19840000 0x0 0x10000>,
- <0x0 0x19870000 0x0 0x30000>;
- reg-names = "syscon", "isp";
- clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
- <&ispcrg JH7110_ISPCLK_ISPV2_TOP_WRAPPER_C>,
- <&ispcrg JH7110_ISPCLK_DVP_INV>,
- <&ispcrg JH7110_ISPCLK_VIN_P_AXI_WR>,
- <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>,
- <&syscrg JH7110_SYSCLK_ISP_TOP_CORE>,
- <&syscrg JH7110_SYSCLK_ISP_TOP_AXI>;
- clock-names = "apb_func", "wrapper_clk_c", "dvp_inv",
- "axiwr", "mipi_rx0_pxl", "ispcore_2x",
- "isp_axi";
- resets = <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_P>,
- <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_C>,
- <&ispcrg JH7110_ISPRST_VIN_P_AXI_RD>,
- <&ispcrg JH7110_ISPRST_VIN_P_AXI_WR>,
- <&syscrg JH7110_SYSRST_ISP_TOP>,
- <&syscrg JH7110_SYSRST_ISP_TOP_AXI>;
- reset-names = "wrapper_p", "wrapper_c", "axird",
- "axiwr", "isp_top_n", "isp_top_axi";
- power-domains = <&pwrc JH7110_PD_ISP>;
- interrupts = <92>, <87>, <90>, <88>;
- status = "disabled";
- };
-
voutcrg: clock-controller@295c0000 {
compatible = "starfive,jh7110-voutcrg";
reg = <0x0 0x295c0000 0x0 0x10000>;
diff --git a/arch/riscv/errata/mips/errata.c b/arch/riscv/errata/mips/errata.c
index e984a8152208..2c3dc2259e93 100644
--- a/arch/riscv/errata/mips/errata.c
+++ b/arch/riscv/errata/mips/errata.c
@@ -57,7 +57,7 @@ void mips_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
}
tmp = (1U << alt->patch_id);
- if (cpu_req_errata && tmp) {
+ if (cpu_req_errata & tmp) {
mutex_lock(&text_mutex);
patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt),
alt->alt_len);
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index bd5fc9403295..7721b63642f4 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -14,5 +14,6 @@ generic-y += ticket_spinlock.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/kernel/compat_signal.c b/arch/riscv/kernel/compat_signal.c
index 6ec4e34255a9..cf3eb33a11e4 100644
--- a/arch/riscv/kernel/compat_signal.c
+++ b/arch/riscv/kernel/compat_signal.c
@@ -107,6 +107,8 @@ static long compat_restore_sigcontext(struct pt_regs *regs,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(&cregs, &sc->sc_regs, sizeof(sc->sc_regs));
+ if (unlikely(err))
+ return err;
cregs_to_regs(&cregs, regs);
diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S
index 2b3d9398c113..90f3549621f7 100644
--- a/arch/riscv/kernel/copy-unaligned.S
+++ b/arch/riscv/kernel/copy-unaligned.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2023 Rivos Inc. */
+#include <linux/cfi_types.h>
#include <linux/linkage.h>
#include <asm/asm.h>
@@ -9,7 +10,7 @@
/* void __riscv_copy_words_unaligned(void *, const void *, size_t) */
/* Performs a memcpy without aligning buffers, using word loads and stores. */
/* Note: The size is truncated to a multiple of 8 * SZREG */
-SYM_FUNC_START(__riscv_copy_words_unaligned)
+SYM_TYPED_FUNC_START(__riscv_copy_words_unaligned)
andi a4, a2, ~((8*SZREG)-1)
beqz a4, 2f
add a3, a1, a4
@@ -41,7 +42,7 @@ SYM_FUNC_END(__riscv_copy_words_unaligned)
/* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */
/* Performs a memcpy without aligning buffers, using only byte accesses. */
/* Note: The size is truncated to a multiple of 8 */
-SYM_FUNC_START(__riscv_copy_bytes_unaligned)
+SYM_TYPED_FUNC_START(__riscv_copy_bytes_unaligned)
andi a4, a2, ~(8-1)
beqz a4, 2f
add a3, a1, a4
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 1734f9a4c2fd..f46aa5602d74 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -896,10 +896,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
* CPU cores with the ratified spec will contain non-zero
* marchid.
*/
- if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) {
- this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v];
+ if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0)
clear_bit(RISCV_ISA_EXT_v, source_isa);
- }
riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
@@ -1104,16 +1102,16 @@ early_param("riscv_isa_fallback", riscv_isa_fallback_setup);
void __init riscv_fill_hwcap(void)
{
char print_str[NUM_ALPHA_EXTS + 1];
- unsigned long isa2hwcap[26] = {0};
+ unsigned long isa2hwcap[RISCV_ISA_EXT_BASE] = {0};
int i, j;
- isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
- isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
- isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
- isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
- isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
- isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
- isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V;
+ isa2hwcap[RISCV_ISA_EXT_i] = COMPAT_HWCAP_ISA_I;
+ isa2hwcap[RISCV_ISA_EXT_m] = COMPAT_HWCAP_ISA_M;
+ isa2hwcap[RISCV_ISA_EXT_a] = COMPAT_HWCAP_ISA_A;
+ isa2hwcap[RISCV_ISA_EXT_f] = COMPAT_HWCAP_ISA_F;
+ isa2hwcap[RISCV_ISA_EXT_d] = COMPAT_HWCAP_ISA_D;
+ isa2hwcap[RISCV_ISA_EXT_c] = COMPAT_HWCAP_ISA_C;
+ isa2hwcap[RISCV_ISA_EXT_v] = COMPAT_HWCAP_ISA_V;
if (!acpi_disabled) {
riscv_fill_hwcap_from_isa_string(isa2hwcap);
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 93de2e7a3074..793bcee46182 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -577,8 +577,8 @@ static int compat_riscv_gpr_set(struct task_struct *target,
struct compat_user_regs_struct cregs;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &cregs, 0, -1);
-
- cregs_to_regs(&cregs, task_pt_regs(target));
+ if (!ret)
+ cregs_to_regs(&cregs, task_pt_regs(target));
return ret;
}
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 2a27d3ff4ac6..81b7682e6c6d 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -584,7 +584,7 @@ static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
static bool misaligned_traps_delegated;
-#ifdef CONFIG_RISCV_SBI
+#if defined(CONFIG_RISCV_SBI_FWFT_DELEGATE_MISALIGNED)
static int cpu_online_sbi_unaligned_setup(unsigned int cpu)
{
diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c
index 6eaa0d94fdfe..cbfb4e495e9f 100644
--- a/arch/riscv/kernel/usercfi.c
+++ b/arch/riscv/kernel/usercfi.c
@@ -109,15 +109,16 @@ void set_indir_lp_lock(struct task_struct *task, bool lock)
task->thread_info.user_cfi_state.ufcfi_locked = lock;
}
/*
- * If size is 0, then to be compatible with regular stack we want it to be as big as
- * regular stack. Else PAGE_ALIGN it and return back
+ * The shadow stack only stores the return address and not any variables
+ * this should be more than sufficient for most applications.
+ * Else PAGE_ALIGN it and return back
*/
static unsigned long calc_shstk_size(unsigned long size)
{
if (size)
return PAGE_ALIGN(size);
- return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+ return PAGE_ALIGN(min(rlimit(RLIMIT_STACK) / 2, SZ_2G));
}
/*
diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S
index 7ce4de6f6e69..361039f7b944 100644
--- a/arch/riscv/kernel/vec-copy-unaligned.S
+++ b/arch/riscv/kernel/vec-copy-unaligned.S
@@ -2,6 +2,7 @@
/* Copyright (C) 2024 Rivos Inc. */
#include <linux/args.h>
+#include <linux/cfi_types.h>
#include <linux/linkage.h>
#include <asm/asm.h>
@@ -16,7 +17,7 @@
/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */
/* Performs a memcpy without aligning buffers, using word loads and stores. */
/* Note: The size is truncated to a multiple of WORD_EEW */
-SYM_FUNC_START(__riscv_copy_vec_words_unaligned)
+SYM_TYPED_FUNC_START(__riscv_copy_vec_words_unaligned)
andi a4, a2, ~(WORD_EEW-1)
beqz a4, 2f
add a3, a1, a4
@@ -38,7 +39,7 @@ SYM_FUNC_END(__riscv_copy_vec_words_unaligned)
/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */
/* Performs a memcpy without aligning buffers, using only byte accesses. */
/* Note: The size is truncated to a multiple of 8 */
-SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned)
+SYM_TYPED_FUNC_START(__riscv_copy_vec_bytes_unaligned)
andi a4, a2, ~(8-1)
beqz a4, 2f
add a3, a1, a4
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index 4d89b94128ae..f09f9251d1f0 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -415,7 +415,6 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
shift = 8 * (sizeof(ulong) - len);
} else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
len = 1;
- shift = 8 * (sizeof(ulong) - len);
#ifdef CONFIG_64BIT
} else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
len = 8;
@@ -649,22 +648,22 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
case 1:
data8 = *((u8 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data8 << shift >> shift);
+ (long)((ulong)data8 << shift) >> shift);
break;
case 2:
data16 = *((u16 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data16 << shift >> shift);
+ (long)((ulong)data16 << shift) >> shift);
break;
case 4:
data32 = *((u32 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data32 << shift >> shift);
+ (long)((ulong)data32 << shift) >> shift);
break;
case 8:
data64 = *((u64 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data64 << shift >> shift);
+ (long)((ulong)data64 << shift) >> shift);
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index a935ed96bc17..bb46dcbfb24d 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -453,8 +453,10 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
}
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
- if (!kvpmu->sdata)
- return -ENOMEM;
+ if (!kvpmu->sdata) {
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
@@ -499,8 +501,10 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low
}
einfo = kzalloc(shmem_size, GFP_KERNEL);
- if (!einfo)
- return -ENOMEM;
+ if (!einfo) {
+ ret = SBI_ERR_FAILURE;
+ goto out;
+ }
ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
if (ret) {
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index 3b834709b429..60e50296a008 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -46,7 +46,7 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
gfn = shmem >> PAGE_SHIFT;
hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
- if (WARN_ON(kvm_is_error_hva(hva))) {
+ if (kvm_is_error_hva(hva)) {
vcpu->arch.sta.shmem = INVALID_GPA;
return;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index 188d5ea5b3b8..c9c323d4577a 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -55,6 +55,8 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
for_each_set_bit(i, &hmask, BITS_PER_LONG) {
rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ if (!rvcpu)
+ continue;
ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
if (ret < 0)
break;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index decd7df40fa4..fa8d2f6f554b 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -792,6 +792,27 @@ static void __init set_mmap_rnd_bits_max(void)
mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
}
+static bool __init is_vaddr_valid(unsigned long va)
+{
+ unsigned long up = 0;
+
+ switch (satp_mode) {
+ case SATP_MODE_39:
+ up = 1UL << 38;
+ break;
+ case SATP_MODE_48:
+ up = 1UL << 47;
+ break;
+ case SATP_MODE_57:
+ up = 1UL << 56;
+ break;
+ default:
+ return false;
+ }
+
+ return (va < up) || (va >= (ULONG_MAX - up + 1));
+}
+
/*
* There is a simple way to determine if 4-level is supported by the
* underlying hardware: establish 1:1 mapping in 4-level page table mode
@@ -833,6 +854,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
set_satp_mode_pmd + PMD_SIZE,
PMD_SIZE, PAGE_KERNEL_EXEC);
retry:
+ if (!is_vaddr_valid(set_satp_mode_pmd))
+ goto out;
+
create_pgd_mapping(early_pg_dir,
set_satp_mode_pmd,
pgtable_l5_enabled ?
@@ -855,6 +879,7 @@ retry:
disable_pgtable_l4();
}
+out:
memset(early_pg_dir, 0, PAGE_SIZE);
memset(early_p4d, 0, PAGE_SIZE);
memset(early_pud, 0, PAGE_SIZE);
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 80bad7de7a04..0c1fc47c3ba0 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,3 +7,4 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += mcs_spinlock.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c
index 86f71a3d1ef2..cdb8006220ca 100644
--- a/arch/s390/kernel/perf_pai.c
+++ b/arch/s390/kernel/perf_pai.c
@@ -186,6 +186,13 @@ static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset)
return page[nr];
}
+static void pai_setctr(unsigned long *page, int nr, unsigned long offset, u64 v)
+{
+ if (offset)
+ nr += offset / sizeof(*page);
+ page[nr] = v;
+}
+
/* Read the counter values. Return value from location in CMP. For base
* event xxx_ALL sum up all events. Returns counter value.
*/
@@ -551,6 +558,8 @@ static void paicrypt_del(struct perf_event *event, int flags)
/* Create raw data and save it in buffer. Calculate the delta for each
* counter between this invocation and the last invocation.
* Returns number of bytes copied.
+ * After reading from PAI counter page, save the read value to the old
+ * page to calculate PAI counter deltas.
* Saves only entries with positive counter difference of the form
* 2 bytes: Number of counter
* 8 bytes: Value of counter
@@ -562,16 +571,22 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
int i, outidx = 0;
for (i = 1; i <= pp->num_avail; i++) {
- u64 val = 0, val_old = 0;
+ u64 val = 0, val_old = 0, val_k = 0, val_old_k = 0;
if (!exclude_kernel) {
- val += pai_getctr(page, i, pp->kernel_offset);
- val_old += pai_getctr(page_old, i, pp->kernel_offset);
+ val_k = pai_getctr(page, i, pp->kernel_offset);
+ val_old_k = pai_getctr(page_old, i, pp->kernel_offset);
+ if (val_k != val_old_k)
+ pai_setctr(page_old, i, pp->kernel_offset, val_k);
}
if (!exclude_user) {
- val += pai_getctr(page, i, 0);
- val_old += pai_getctr(page_old, i, 0);
+ val = pai_getctr(page, i, 0);
+ val_old = pai_getctr(page_old, i, 0);
+ if (val != val_old)
+ pai_setctr(page_old, i, 0, val);
}
+ val += val_k;
+ val_old += val_old_k;
if (val >= val_old)
val -= val_old;
else
@@ -602,8 +617,6 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
struct perf_event *event)
{
- int idx = PAI_PMU_IDX(event);
- struct pai_pmu *pp = &pai_pmu[idx];
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
@@ -634,8 +647,6 @@ static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
- /* Save crypto counter lowcore page after reading event data. */
- memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
return overflow;
}
@@ -651,7 +662,7 @@ static void pai_have_sample(struct perf_event *event, struct pai_map *cpump)
rawsize = pai_copy(cpump->save, cpump->area, pp,
(unsigned long *)PAI_SAVE_AREA(event),
event->attr.exclude_user,
- event->attr.exclude_kernel);
+ !pp->kernel_offset ? true : event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
pai_push_sample(rawsize, cpump, event);
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 1913a5566ac2..1377c6f3f670 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -192,17 +192,21 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
+ /*
+ * Adjust drawer_id, book_id, and socked_id so they match the
+ * numbering scheme of e.g. the hardware management console.
+ */
case 3:
drawer = drawer->next;
- drawer->id = tle->container.id;
+ drawer->id = tle->container.id - 1;
break;
case 2:
book = book->next;
- book->id = tle->container.id;
+ book->id = tle->container.id - 1;
break;
case 1:
socket = socket->next;
- socket->id = tle->container.id;
+ socket->id = tle->container.id - 1;
break;
case 0:
add_cpus_to_mask(&tle->cpu, drawer, book, socket);
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 7b8d70fe406d..4a41c0247ffa 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
+ pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 8f8278c44879..873e13ac5a27 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -145,7 +145,8 @@ union pgste {
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 5;
+ unsigned long vsie_gmem : 1; /* Contains nested guest memory */
+ unsigned long : 4;
unsigned long : 8;
};
struct {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index b07accd19618..4f8d5592c9a9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
} else {
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
+ pgste.vsie_gmem = 1;
}
pgste_set_unlock(ptep_h, pgste);
if (rc)
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 3c26e35af0ef..957126ab991c 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -125,7 +125,7 @@ struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit)
int gmap_set_limit(struct gmap *gmap, gfn_t limit)
{
- struct kvm_s390_mmu_cache *mc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
int rc, type;
type = gmap_limit_to_type(limit);
@@ -142,7 +142,6 @@ int gmap_set_limit(struct gmap *gmap, gfn_t limit)
rc = dat_set_asce_limit(mc, &gmap->asce, type);
} while (rc == -ENOMEM);
- kvm_s390_free_mmu_cache(mc);
return 0;
}
@@ -822,8 +821,8 @@ int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t
int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count)
{
- struct kvm_s390_mmu_cache *mc;
- int rc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
+ int rc = 0;
mc = kvm_s390_new_mmu_cache();
if (!mc)
@@ -1026,13 +1025,15 @@ int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level)
int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
kvm_pfn_t pfn, int level, bool wr)
{
+ unsigned long bitmask;
union crste *crstep;
union pgste pgste;
union pte *ptep;
union pte pte;
int flags, rc;
- KVM_BUG_ON(!is_shadow(sg), sg->kvm);
+ if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
+ return -EINVAL;
lockdep_assert_held(&sg->parent->children_lock);
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
@@ -1041,8 +1042,9 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
if (rc)
return rc;
if (level <= TABLE_TYPE_REGION1) {
+ bitmask = -1UL << (8 + 11 * level);
scoped_guard(spinlock, &sg->host_to_rmap_lock)
- rc = gmap_insert_rmap(sg, p_gfn, r_gfn, level);
+ rc = gmap_insert_rmap(sg, p_gfn, r_gfn & bitmask, level);
}
if (rc)
return rc;
@@ -1143,8 +1145,10 @@ void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn)
}
scoped_guard(spinlock, &sg->host_to_rmap_lock)
head = radix_tree_delete(&sg->host_to_rmap, gfn);
- gmap_for_each_rmap_safe(rmap, rnext, head)
+ gmap_for_each_rmap_safe(rmap, rnext, head) {
gmap_unshadow_level(sg, rmap->r_gfn, rmap->level);
+ kfree(rmap);
+ }
}
}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 96ee1395a592..742e42a31744 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
return _gmap_unmap_prefix(gmap, gfn, end, false);
}
+/**
+ * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
+ * @oldpte: the previous value for the guest pte.
+ * @newpte: the new pte being set.
+ * @pgste: the pgste for the pte entry.
+ *
+ * If the pgste.vsie_notif bit is not set, return false: the page is not
+ * involved in vsie and thus should not trigger an unshadow operation.
+ *
+ * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
+ * memory. The access rights on g3's memory should be synchronized with g1's
+ * and g2's. Therefore unshadowing is triggered if the new and old pte
+ * differ in protection, or if the new pte is invalid.
+ *
+ * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
+ * for g3. If the entry becomes writable or absent, it becomes impossible to
+ * guarantee that the shadow mapping will match g2's mapping. In that case,
+ * trigger an unshadow event.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
+{
+ if (!pgste.vsie_notif)
+ return false;
+ if (pgste.vsie_gmem)
+ return (oldpte.h.p != newpte.h.p) || newpte.h.i;
+ return !newpte.h.p || !newpte.s.pr;
+}
+
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
@@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
pgste.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + 1);
}
- if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
+ if (pte_needs_unshadow(*ptep, newpte, pgste)) {
pgste.vsie_notif = 0;
+ pgste.vsie_gmem = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
@@ -189,6 +220,7 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
}
if (!ptep->s.d && newpte.s.d && !newpte.s.s)
SetPageDirty(pfn_to_page(newpte.h.pfra));
+ pgste.zero = 0;
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
}
@@ -198,6 +230,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
+/**
+ * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
+ * @oldcrste: the previous value for the crste.
+ * @newcrste: the new value for the crste.
+ *
+ * If the old crste did not have the vsie_notif bit set, return false: the
+ * page is not involved in vsie and thus should not trigger an unshadow
+ * operation. Conversely, if the bit is set, it can only be g3 memory, since
+ * dat tables are never mapped using large pages.
+ *
+ * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
+ * protection bit is changing or the new page is invalid, trigger an
+ * unshadow event. Also trigger an unshadow event if the new crste does not
+ * have the vsie_notif bit set.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
+{
+ if (!oldcrste.s.fc1.vsie_notif)
+ return false;
+ return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
+}
+
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
union crste oldcrste, union crste newcrste,
gfn_t gfn, bool needs_lock)
@@ -216,8 +272,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
- (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 07f59c3b9a7b..3bcdbbbb6891 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3310,8 +3310,7 @@ static void aen_host_forward(unsigned long si)
struct zpci_gaite *gaite;
struct kvm *kvm;
- gaite = (struct zpci_gaite *)aift->gait +
- (si * sizeof(struct zpci_gaite));
+ gaite = aift->gait + si;
if (gaite->count == 0)
return;
if (gaite->aisb != 0)
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 86d93e8dddae..5b075c38998e 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -166,7 +166,7 @@ static int kvm_zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.noi = airq_iv_end(zdev->aibv);
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
fib.fmt0.aibvo = 0;
- fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
fib.gd = zdev->gisa;
@@ -290,8 +290,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
phys_to_virt(fib->fmt0.aibv));
spin_lock_irq(&aift->gait_lock);
- gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
- sizeof(struct zpci_gaite));
+ gaite = aift->gait + zdev->aisb;
/* If assist not requested, host will get all alerts */
if (assist)
@@ -309,7 +308,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
/* Update guest FIB for re-issue */
fib->fmt0.aisbo = zdev->aisb & 63;
- fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib->fmt0.isc = gisc;
/* Save some guest fib values in the host for later use */
@@ -357,8 +356,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
if (zdev->kzdev->fib.fmt0.aibv == 0)
goto out;
spin_lock_irq(&aift->gait_lock);
- gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
- sizeof(struct zpci_gaite));
+ gaite = aift->gait + zdev->aisb;
isc = gaite->gisc;
gaite->count--;
if (gaite->count == 0) {
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 4d3f10ed8275..f0403d3ee8ab 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -3,4 +3,5 @@ generated-y += syscall_table.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 17ee8a273aa6..49c6bb326b75 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
generic-y += agp.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1b9b82bbe322..2a1629ba8140 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += module.lds.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += ring_buffer.h
generic-y += runtime-const.h
generic-y += softirq_stack.h
generic-y += switch_to.h
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 72cae8e0ce85..83b4762d6ecb 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_syscall_32.o += -fno-stack-protector
CFLAGS_syscall_64.o += -fno-stack-protector
-obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o common.o
obj-y += vdso/
obj-y += vsyscall/
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
new file mode 100644
index 000000000000..06c7c6ebd6f9
--- /dev/null
+++ b/arch/x86/entry/common.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/entry-common.h>
+#include <linux/kvm_types.h>
+#include <linux/hrtimer_rearm.h>
+#include <asm/fred.h>
+#include <asm/desc.h>
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * On VMX, NMIs and IRQs (as configured by KVM) are acknowledged by hardware as
+ * part of the VM-Exit, i.e. the event itself is consumed as part the VM-Exit.
+ * x86_entry_from_kvm() is invoked by KVM to effectively forward NMIs and IRQs
+ * to the kernel for servicing. On SVM, a.k.a. AMD, the NMI/IRQ VM-Exit is
+ * purely a signal that an NMI/IRQ is pending, i.e. the event that triggered
+ * the VM-Exit is held pending until it's unblocked in the host.
+ */
+noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector)
+{
+ if (event_type == EVENT_TYPE_EXTINT) {
+#ifdef CONFIG_X86_64
+ /*
+ * Use FRED dispatch, even when running IDT. The dispatch
+ * tables are kept in sync between FRED and IDT, and the FRED
+ * dispatch works well with CFI.
+ */
+ fred_entry_from_kvm(event_type, vector);
+#else
+ idt_entry_from_kvm(vector);
+#endif
+ /*
+ * Strictly speaking, only the NMI path requires noinstr.
+ */
+ instrumentation_begin();
+ /*
+ * KVM/VMX will dispatch from IRQ-disabled but for a context
+ * that will have IRQs-enabled. This confuses the entry code
+ * and it will not have reprogrammed the timer. Do so now.
+ */
+ hrtimer_rearm_deferred();
+ instrumentation_end();
+
+ return;
+ }
+
+ WARN_ON_ONCE(event_type != EVENT_TYPE_NMI);
+
+#ifdef CONFIG_X86_64
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return fred_entry_from_kvm(event_type, vector);
+#endif
+
+ /*
+ * Notably, we must use IDT dispatch for NMI when running in IDT mode.
+ * The FRED NMI context is significantly different and will not work
+ * right (specifically FRED fixed the NMI recursion issue).
+ */
+ idt_entry_from_kvm(vector);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm);
+#endif
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 6ba2b3adcef0..a56e043b266d 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -75,3 +75,49 @@ THUNK warn_thunk_thunk, __warn_thunk
#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
EXPORT_SYMBOL(__ref_stack_chk_guard);
#endif
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+.macro IDT_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ leave
+ RET
+.endm
+
+.pushsection .text, "ax"
+SYM_FUNC_START(idt_do_interrupt_irqoff)
+ IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(idt_do_interrupt_irqoff)
+.popsection
+
+.pushsection .noinstr.text, "ax"
+SYM_FUNC_START(idt_do_nmi_irqoff)
+ IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(idt_do_nmi_irqoff)
+.popsection
+#endif
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index 894f7f16eb80..0d2768ab836c 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
RET
SYM_FUNC_END(asm_fred_entry_from_kvm)
-EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm);
#endif
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index a6bfcc8243cd..d903bce24f15 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -178,7 +178,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size, NULL);
- do_munmap(mm, addr, image->size, NULL);
+ do_munmap(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, NULL);
goto up_fail;
}
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4566000e15c4..078fd2c0d69d 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -14,3 +14,4 @@ generic-y += early_ioremap.h
generic-y += fprobe.h
generic-y += mcs_spinlock.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ec95fe44fa3a..00aeae843529 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -438,6 +438,10 @@ extern void idt_setup_traps(void);
extern void idt_setup_apic_and_irq_gates(void);
extern bool idt_is_f00f_address(unsigned long address);
+extern void idt_do_interrupt_irqoff(unsigned long address);
+extern void idt_do_nmi_irqoff(void);
+extern void idt_entry_from_kvm(unsigned int vector);
+
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
#else
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 7e6b9314758a..2f2ce8aadf07 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -145,7 +145,7 @@ struct gate_struct {
typedef struct gate_struct gate_desc;
#ifndef _SETUP
-static inline unsigned long gate_offset(const gate_desc *g)
+static __always_inline unsigned long gate_offset(const gate_desc *g)
{
#ifdef CONFIG_X86_64
return g->offset_low | ((unsigned long)g->offset_middle << 16) |
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7535131c711b..eca24b5e07f4 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void)
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
+extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector);
+
#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 2bb65677c079..18a2f811c358 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -110,7 +110,6 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { ret
static inline void cpu_init_fred_exceptions(void) { }
static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
-static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index d7c8ef1e354d..be4c5e9e5ff6 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -88,19 +88,19 @@ static void amd_set_max_freq_ratio(void)
rc = cppc_get_perf_caps(0, &perf_caps);
if (rc) {
- pr_warn("Could not retrieve perf counters (%d)\n", rc);
+ pr_debug("Could not retrieve perf counters (%d)\n", rc);
return;
}
rc = amd_get_boost_ratio_numerator(0, &numerator);
if (rc) {
- pr_warn("Could not retrieve highest performance (%d)\n", rc);
+ pr_debug("Could not retrieve highest performance (%d)\n", rc);
return;
}
nominal_perf = perf_caps.nominal_perf;
if (!nominal_perf) {
- pr_warn("Could not retrieve nominal performance\n");
+ pr_debug("Could not retrieve nominal performance\n");
return;
}
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 146f6f8b0650..99801e844b30 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -92,6 +92,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
{ X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL },
{ X86_FEATURE_LASS, X86_FEATURE_SMAP },
+ { X86_FEATURE_INVLPGB, X86_FEATURE_PCID },
{}
};
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8dd424ac5de8..f3a793e3a6c8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {
};
static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
-static unsigned long mce_need_notify;
/*
* MCA banks polled by the period polling timer for corrected events.
@@ -152,8 +151,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
void mce_log(struct mce_hw_err *err)
{
- if (mce_gen_pool_add(err))
+ if (mce_gen_pool_add(err)) {
+ pr_info(HW_ERR "Machine check events logged\n");
irq_work_queue(&mce_irq_work);
+ }
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -585,28 +586,6 @@ bool mce_is_correctable(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_correctable);
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-static bool mce_notify_irq(void)
-{
- /* Not more than two messages every minute */
- static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
- if (test_and_clear_bit(0, &mce_need_notify)) {
- mce_work_trigger();
-
- if (__ratelimit(&ratelimit))
- pr_info(HW_ERR "Machine check events logged\n");
-
- return true;
- }
-
- return false;
-}
-
static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -618,9 +597,7 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
/* Emit the trace record: */
trace_mce_record(err);
- set_bit(0, &mce_need_notify);
-
- mce_notify_irq();
+ mce_work_trigger();
return NOTIFY_DONE;
}
@@ -1804,7 +1781,7 @@ static void mce_timer_fn(struct timer_list *t)
* Alert userspace if needed. If we logged an MCE, reduce the polling
* interval, otherwise increase the polling interval.
*/
- if (mce_notify_irq())
+ if (!mce_gen_pool_empty())
iv = max(iv / 2, (unsigned long) HZ/100);
else
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 260456588756..7bcf1decc034 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -268,6 +268,21 @@ void __init idt_setup_early_pf(void)
}
#endif
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+noinstr void idt_entry_from_kvm(unsigned int vector)
+{
+ if (vector == NMI_VECTOR)
+ return idt_do_nmi_irqoff();
+
+ /*
+ * Only the NMI path requires noinstr.
+ */
+ instrumentation_begin();
+ idt_do_interrupt_irqoff(gate_offset(idt_table + vector));
+ instrumentation_end();
+}
+#endif
+
static void __init idt_map_in_cea(void)
{
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 3d239ed12744..52a3afb1b79e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -614,7 +614,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
{
exc_nmi(regs);
}
-EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx);
#endif
#ifdef CONFIG_NMI_CHECK_CPU
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4ffba68dc57b..eaeb77464c06 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -136,6 +136,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* %r13 original CR4 when relocate_kernel() was invoked
*/
+ /*
+ * Set return address to 0 if not preserving context. The purgatory
+ * shipped in kexec-tools will unconditionally look for the return
+ * address on the stack and set a kexec_jump_back_entry= command
+ * line option if it's non-zero. There's no other way that it can
+ * tell a preserve-context (kjump) kexec from a normal one.
+ */
+ pushq $0
/* store the start address on the stack */
pushq %rdx
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8c6cc0406d6..8013dccb3110 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4481,7 +4481,7 @@ static const struct opcode opcode_map_0f_38[256] = {
X16(N), X16(N),
/* 0x20 - 0x2f */
X8(N),
- X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
+ X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
/* 0x30 - 0x7f */
X16(N), X16(N), X16(N), X16(N), X16(N),
/* 0x80 - 0xef */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 892246204435..f0144ae8d891 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2526,6 +2526,23 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
__shadow_walk_next(iterator, *iterator->sptep);
}
+/*
+ * Note: while normally KVM uses a "bool flush" return value to let
+ * the caller batch flushes, __link_shadow_page() flushes immediately
+ * before populating the parent PTE with the new shadow page. The
+ * typical callers, direct_map() and FNAME(fetch)(), are not going
+ * to zap more than one huge SPTE anyway.
+ *
+ * The only exception, where @flush can be false, is when a huge SPTE
+ * is replaced with a shadow page SPTE with a fully populated page table,
+ * which can happen from shadow_mmu_split_huge_page(). In this case,
+ * no memory is unmapped across the change to the page tables and no
+ * immediate flush is needed for correctness.
+ *
+ * Even in that case, calls to kvm_mmu_commit_zap_page() are not
+ * batched. Doing so would require adding an invalid_list argument
+ * all the way down to __walk_slot_rmaps().
+ */
static void __link_shadow_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache, u64 *sptep,
struct kvm_mmu_page *sp, bool flush)
@@ -2541,8 +2558,10 @@ static void __link_shadow_page(struct kvm *kvm,
parent_sp = sptep_to_sp(sptep);
WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K);
- mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list);
- kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true);
+ if (mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list))
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ else if (flush)
+ kvm_flush_remote_tlbs_sptep(kvm, sptep);
}
spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index adf211860949..993b551180fe 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1300,12 +1300,14 @@ bool __init avic_hardware_setup(void)
}
/*
- * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
- * due to erratum 1235, which results in missed VM-Exits on the sender
- * and thus missed wake events for blocking vCPUs due to the CPU
- * failing to see a software update to clear IsRunning.
+ * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and
+ * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235,
+ * which results in missed VM-Exits on the sender and thus missed wake
+ * events for blocking vCPUs due to the CPU failing to see a software
+ * update to clear IsRunning.
*/
- enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17;
+ if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18)
+ enable_ipiv = false;
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 961804df5f45..b340dc9991ad 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -160,6 +160,16 @@ void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
if (!intercept_smi)
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
+ /*
+ * Intercept PAUSE if and only if L1 wants to. KVM intercepts PAUSE so
+ * that a vCPU that may be spinning waiting for a lock can be scheduled
+ * out in favor of the vCPU that holds said lock. KVM doesn't support
+ * yielding across L2 vCPUs, as KVM has limited visilibity into which
+ * L2 vCPUs are in the same L2 VM, i.e. may be contending for locks.
+ */
+ if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE))
+ vmcb_clr_intercept(&vmcb02->control, INTERCEPT_PAUSE);
+
if (nested_vmcb_needs_vls_intercept(svm)) {
/*
* If the virtual VMLOAD/VMSAVE is not enabled for the L2,
@@ -819,7 +829,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
- u32 pause_count12, pause_thresh12;
nested_svm_transition_tlb_flush(vcpu);
@@ -947,31 +956,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
- pause_count12 = vmcb12_ctrl->pause_filter_count;
+ vmcb02->control.pause_filter_count = vmcb12_ctrl->pause_filter_count;
else
- pause_count12 = 0;
+ vmcb02->control.pause_filter_count = 0;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
- pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
+ vmcb02->control.pause_filter_thresh = vmcb12_ctrl->pause_filter_thresh;
else
- pause_thresh12 = 0;
- if (kvm_pause_in_guest(svm->vcpu.kvm)) {
- /* use guest values since host doesn't intercept PAUSE */
- vmcb02->control.pause_filter_count = pause_count12;
- vmcb02->control.pause_filter_thresh = pause_thresh12;
-
- } else {
- /* start from host values otherwise */
- vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
- vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
-
- /* ... but ensure filtering is disabled if so requested. */
- if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
- if (!pause_count12)
- vmcb02->control.pause_filter_count = 0;
- if (!pause_thresh12)
- vmcb02->control.pause_filter_thresh = 0;
- }
- }
+ vmcb02->control.pause_filter_thresh = 0;
/*
* Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
@@ -1298,12 +1289,6 @@ void nested_svm_vmexit(struct vcpu_svm *svm)
/* in case we halted in L2 */
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
- if (!kvm_pause_in_guest(vcpu->kvm)) {
- vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
- vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
-
- }
-
/*
* Invalidate last_bus_lock_rip unless KVM is still waiting for the
* guest to make forward progress before re-enabling bus lock detection.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e7fdd7a9c280..e02a38da5296 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -913,7 +913,15 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm))
+ /* Adjusting pause_filter_count makes no sense if PLE is disabled. */
+ WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
+
+ /*
+ * While running L2, KVM should intercept PAUSE if and only if L1 wants
+ * to intercept PAUSE, and L1's intercept should take priority, i.e.
+ * KVM should never handle a PAUSE intercept from L2.
+ */
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return;
control->pause_filter_count = __grow_ple_window(old,
@@ -934,7 +942,10 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm))
+ /* Adjusting pause_filter_count makes no sense if PLE is disabled. */
+ WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
+
+ if (is_guest_mode(vcpu))
return;
control->pause_filter_count =
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e7fdbe9efc90..0db25bba17f6 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -154,7 +154,7 @@ TRACE_EVENT(kvm_xen_hypercall,
__entry->a2 = a2;
__entry->a3 = a3;
__entry->a4 = a4;
- __entry->a4 = a5;
+ __entry->a5 = a5;
),
TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 56cacc06225e..31568274d8bb 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -14,6 +14,7 @@ extern bool __read_mostly flexpriority_enabled;
extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
+extern bool __read_mostly enable_cet;
extern bool __read_mostly enable_pml;
extern int __read_mostly pt_mode;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 8a481dae9cae..ff1f254a0ef4 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,38 +31,6 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
-
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- \call_insn \call_target
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- leave
- RET
-.endm
-
.section .noinstr.text, "ax"
/**
@@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/**
@@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline)
RET
SYM_FUNC_END(vmread_error_trampoline)
#endif
-
-.section .text, "ax"
-
-#ifndef CONFIG_X86_FRED
-
-SYM_FUNC_START(vmx_do_interrupt_irqoff)
- VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
-SYM_FUNC_END(vmx_do_interrupt_irqoff)
-
-#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5c2c33a5f7dc..b9103de01428 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -108,6 +108,9 @@ module_param_named(unrestricted_guest,
bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);
+bool __read_mostly enable_cet = 1;
+module_param_named(cet, enable_cet, bool, 0444);
+
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, 0444);
@@ -4476,7 +4479,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
* SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
* 3 and 4 for details.
*/
- if (cpu_has_load_cet_ctrl()) {
+ if (enable_cet) {
vmcs_writel(HOST_S_CET, kvm_host.s_cet);
vmcs_writel(HOST_SSP, 0);
vmcs_writel(HOST_INTR_SSP_TABLE, 0);
@@ -4532,6 +4535,10 @@ static u32 vmx_get_initial_vmentry_ctrl(void)
if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
+
+ if (!enable_cet)
+ vmentry_ctrl &= ~VM_ENTRY_LOAD_CET_STATE;
+
/*
* IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
*/
@@ -4546,6 +4553,9 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+ if (!enable_cet)
+ vmexit_ctrl &= ~VM_EXIT_LOAD_CET_STATE;
+
/*
* Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
* nested virtualization and thus allowed to be set in vmcs12.
@@ -7107,9 +7117,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
-void vmx_do_interrupt_irqoff(unsigned long entry);
-void vmx_do_nmi_irqoff(void);
-
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
/*
@@ -7151,17 +7158,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- /*
- * Invoke the kernel's IRQ handler for the vector. Use the FRED path
- * when it's available even if FRED isn't fully enabled, e.g. even if
- * FRED isn't supported in hardware, in order to avoid the indirect
- * CALL in the non-FRED path.
- */
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- if (IS_ENABLED(CONFIG_X86_FRED))
- fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
- else
- vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
+ x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7471,10 +7469,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu)
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- if (cpu_feature_enabled(X86_FEATURE_FRED))
- fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
- else
- vmx_do_nmi_irqoff();
+ x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
kvm_after_interrupt(vcpu);
}
@@ -8155,7 +8150,7 @@ static __init void vmx_set_cpu_caps(void)
* VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
* fails, so disable CET in this case too.
*/
- if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ if (!enable_cet || !enable_unrestricted_guest ||
!cpu_has_vmx_basic_no_hw_errcode_cc()) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
@@ -8630,6 +8625,9 @@ __init int vmx_hardware_setup(void)
!cpu_has_vmx_invept_global())
enable_ept = 0;
+ if (!cpu_has_load_cet_ctrl())
+ enable_cet = 0;
+
/* NX support is required for shadow paging. */
if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
pr_err_ratelimited("NX (Execute Disable) not supported\n");
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..c1a72d749084 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4876,7 +4876,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = tdp_enabled;
break;
case KVM_CAP_X86_APIC_BUS_CYCLES_NS:
- r = APIC_BUS_CYCLE_NS_DEFAULT;
+ r = kvm ? kvm->arch.apic_bus_cycle_ns : APIC_BUS_CYCLE_NS_DEFAULT;
break;
case KVM_CAP_EXIT_HYPERCALL:
r = KVM_EXIT_HYPERCALL_VALID_MASK;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c80d0058efd1..3eee5f84f8a7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2145,7 +2145,10 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
static void xen_enter_lazy_mmu(void)
{
- enter_lazy(XEN_LAZY_MMU);
+ preempt_disable();
+ if (xen_get_lazy_mode() != XEN_LAZY_MMU)
+ enter_lazy(XEN_LAZY_MMU);
+ preempt_enable();
}
static void xen_flush_lazy_mmu(void)
@@ -2182,7 +2185,8 @@ static void xen_leave_lazy_mmu(void)
{
preempt_disable();
xen_mc_flush();
- leave_lazy(XEN_LAZY_MMU);
+ if (xen_get_lazy_mode() != XEN_LAZY_NONE)
+ leave_lazy(XEN_LAZY_MMU);
preempt_enable();
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index bb95a05259b8..41251d4cf953 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -655,7 +655,7 @@ static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry)
/* Fill new entry (keep size and page offset). */
entry->type = swap_entry->type;
entry->addr = entry_end - swap_size +
- swap_addr - swap_entry->addr;
+ swap_entry->addr - swap_addr;
entry->size = swap_entry->size;
/* Convert old entry to RAM, align to pages. */
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 13fe45dea296..e57af619263a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -6,5 +6,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h